xref: /freebsd/sys/kern/subr_stats.c (revision e0c4386e)
1 /*-
2  * Copyright (c) 2014-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * Author: Lawrence Stewart <lstewart@netflix.com>
29  */
30 
31 #include <sys/param.h>
32 #include <sys/arb.h>
33 #include <sys/ctype.h>
34 #include <sys/errno.h>
35 #include <sys/hash.h>
36 #include <sys/limits.h>
37 #include <sys/malloc.h>
38 #include <sys/qmath.h>
39 #include <sys/sbuf.h>
40 #if defined(DIAGNOSTIC)
41 #include <sys/tree.h>
42 #endif
43 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
44 #include <sys/stddef.h>
45 #include <sys/stdint.h>
46 #include <sys/time.h>
47 
48 #ifdef _KERNEL
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/rwlock.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #else /* ! _KERNEL */
55 #include <pthread.h>
56 #include <stdbool.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #endif /* _KERNEL */
61 
62 struct voistatdata_voistate {
63 	/* Previous VOI value for diff calculation. */
64 	struct voistatdata_numeric prev;
65 };
66 
67 #define	VS_VSDVALID	0x0001	/* Stat's voistatdata updated at least once. */
68 struct voistat {
69 	int8_t		stype;		/* Type of stat e.g. VS_STYPE_SUM. */
70 	enum vsd_dtype	dtype : 8;	/* Data type of this stat's data. */
71 	uint16_t	data_off;	/* Blob offset for this stat's data. */
72 	uint16_t	dsz;		/* Size of stat's data. */
73 #define	VS_EBITS 8
74 	uint16_t	errs : VS_EBITS;/* Non-wrapping error count. */
75 	uint16_t	flags : 16 - VS_EBITS;
76 };
77 /* The voistat error count is capped to avoid wrapping. */
78 #define	VS_INCERRS(vs) do {						\
79 	if ((vs)->errs < (1U << VS_EBITS) - 1)				\
80 		(vs)->errs++;						\
81 } while (0)
82 
83 /*
84  * Ideas for flags:
85  *   - Global or entity specific (global would imply use of counter(9)?)
86  *   - Whether to reset stats on read or not
87  *   - Signal an overflow?
88  *   - Compressed voistat array
89  */
90 #define	VOI_REQSTATE	0x0001	/* VOI requires VS_STYPE_VOISTATE. */
91 struct voi {
92 	int16_t		id;		/* VOI id. */
93 	enum vsd_dtype	dtype : 8;	/* Data type of the VOI itself. */
94 	int8_t		voistatmaxid;	/* Largest allocated voistat index. */
95 	uint16_t	stats_off;	/* Blob offset for this VOIs stats. */
96 	uint16_t	flags;
97 };
98 
99 /*
100  * Memory for the entire blob is allocated as a slab and then offsets are
101  * maintained to carve up the slab into sections holding different data types.
102  *
103  * Ideas for flags:
104  * - Compressed voi array (trade off memory usage vs search time)
105  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
106  */
107 struct statsblobv1 {
108 	uint8_t		abi;
109 	uint8_t		endian;
110 	uint16_t	flags;
111 	uint16_t	maxsz;
112 	uint16_t	cursz;
113 	/* Fields from here down are opaque to consumers. */
114 	uint32_t	tplhash;	/* Base template hash ID. */
115 	uint16_t	stats_off;	/* voistat array blob offset. */
116 	uint16_t	statsdata_off;	/* voistatdata array blob offset. */
117 	sbintime_t	created;	/* Blob creation time. */
118 	sbintime_t	lastrst;	/* Time of last reset. */
119 	struct voi	vois[];		/* Array indexed by [voi_id]. */
120 } __aligned(sizeof(void *));
121 _Static_assert(offsetof(struct statsblobv1, cursz) +
122     SIZEOF_MEMBER(struct statsblobv1, cursz) ==
123     offsetof(struct statsblob, opaque),
124     "statsblobv1 ABI mismatch");
125 
126 struct statsblobv1_tpl {
127 	struct metablob		*mb;
128 	struct statsblobv1	*sb;
129 };
130 
131 /* Context passed to iterator callbacks. */
132 struct sb_iter_ctx {
133 	void		*usrctx;	/* Caller supplied context. */
134 	uint32_t	flags;		/* Flags for current iteration. */
135 	int16_t		vslot;		/* struct voi slot index. */
136 	int8_t		vsslot;		/* struct voistat slot index. */
137 };
138 
139 struct sb_tostrcb_ctx {
140 	struct sbuf		*buf;
141 	struct statsblob_tpl	*tpl;
142 	enum sb_str_fmt	fmt;
143 	uint32_t		flags;
144 };
145 
146 struct sb_visitcb_ctx {
147 	stats_blob_visitcb_t	cb;
148 	void			*usrctx;
149 };
150 
151 /* Stats blob iterator callback. */
152 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
153     struct voistat *vs, struct sb_iter_ctx *ctx);
154 
155 #ifdef _KERNEL
156 static struct rwlock tpllistlock;
157 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
158 #define	TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
159 #define	TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
160 #define	TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
161 #define	TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
162 #define	TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
163 #define	TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
164 #define	TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
165 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
166 #define	stats_free(ptr) free((ptr), M_STATS)
167 #else /* ! _KERNEL */
168 static void stats_constructor(void);
169 static void stats_destructor(void);
170 static pthread_rwlock_t tpllistlock;
171 #define	TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
172 #define	TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
173 #define	TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
174 #define	TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
175 #define	TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
176 #define	TPL_LIST_LOCK_ASSERT() do { } while (0)
177 #define	TPL_LIST_RLOCK_ASSERT() do { } while (0)
178 #define	TPL_LIST_WLOCK_ASSERT() do { } while (0)
179 #ifdef NDEBUG
180 #define	KASSERT(cond, msg) do {} while (0)
181 #define	stats_abort() do {} while (0)
182 #else /* ! NDEBUG */
183 #define	KASSERT(cond, msg) do { \
184 	if (!(cond)) { \
185 		panic msg; \
186 	} \
187 } while (0)
188 #define	stats_abort() abort()
189 #endif /* NDEBUG */
190 #define	stats_free(ptr) free(ptr)
191 #define	panic(fmt, ...) do { \
192 	fprintf(stderr, (fmt), ##__VA_ARGS__); \
193 	stats_abort(); \
194 } while (0)
195 #endif /* _KERNEL */
196 
197 #define	SB_V1_MAXSZ 65535
198 
199 /* Obtain a blob offset pointer. */
200 #define	BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
201 
202 /*
203  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
204  * power of 2 size, we can shift instead of divide. The shift amount must be
205  * updated if sizeof(struct voi) ever changes, which the assert should catch.
206  */
207 #define	NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
208     sizeof(struct statsblobv1)) >> 3))
209 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
210 
211 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
212 const char *vs_stype2name[VS_NUM_STYPES] = {
213 	[VS_STYPE_VOISTATE] = "VOISTATE",
214 	[VS_STYPE_SUM] = "SUM",
215 	[VS_STYPE_MAX] = "MAX",
216 	[VS_STYPE_MIN] = "MIN",
217 	[VS_STYPE_HIST] = "HIST",
218 	[VS_STYPE_TDGST] = "TDGST",
219 };
220 
221 const char *vs_stype2desc[VS_NUM_STYPES] = {
222 	[VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
223 	[VS_STYPE_SUM] = "Simple arithmetic accumulator",
224 	[VS_STYPE_MAX] = "Maximum observed VOI value",
225 	[VS_STYPE_MIN] = "Minimum observed VOI value",
226 	[VS_STYPE_HIST] = "Histogram of observed VOI values",
227 	[VS_STYPE_TDGST] = "t-digest of observed VOI values",
228 };
229 
230 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
231 	[VSD_DTYPE_VOISTATE] = "VOISTATE",
232 	[VSD_DTYPE_INT_S32] = "INT_S32",
233 	[VSD_DTYPE_INT_U32] = "INT_U32",
234 	[VSD_DTYPE_INT_S64] = "INT_S64",
235 	[VSD_DTYPE_INT_U64] = "INT_U64",
236 	[VSD_DTYPE_INT_SLONG] = "INT_SLONG",
237 	[VSD_DTYPE_INT_ULONG] = "INT_ULONG",
238 	[VSD_DTYPE_Q_S32] = "Q_S32",
239 	[VSD_DTYPE_Q_U32] = "Q_U32",
240 	[VSD_DTYPE_Q_S64] = "Q_S64",
241 	[VSD_DTYPE_Q_U64] = "Q_U64",
242 	[VSD_DTYPE_CRHIST32] = "CRHIST32",
243 	[VSD_DTYPE_DRHIST32] = "DRHIST32",
244 	[VSD_DTYPE_DVHIST32] = "DVHIST32",
245 	[VSD_DTYPE_CRHIST64] = "CRHIST64",
246 	[VSD_DTYPE_DRHIST64] = "DRHIST64",
247 	[VSD_DTYPE_DVHIST64] = "DVHIST64",
248 	[VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
249 	[VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
250 };
251 
252 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
253 	[VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
254 	[VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
255 	[VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
256 	[VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
257 	[VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
258 	[VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
259 	[VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
260 	[VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
261 	[VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
262 	[VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
263 	[VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
264 	[VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
265 	[VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
266 	[VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
267 	[VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
268 	[VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
269 	[VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
270 	[VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
271 	[VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
272 };
273 
274 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
275 	[VSD_DTYPE_VOISTATE] = true,
276 	[VSD_DTYPE_INT_S32] = false,
277 	[VSD_DTYPE_INT_U32] = false,
278 	[VSD_DTYPE_INT_S64] = false,
279 	[VSD_DTYPE_INT_U64] = false,
280 	[VSD_DTYPE_INT_SLONG] = false,
281 	[VSD_DTYPE_INT_ULONG] = false,
282 	[VSD_DTYPE_Q_S32] = false,
283 	[VSD_DTYPE_Q_U32] = false,
284 	[VSD_DTYPE_Q_S64] = false,
285 	[VSD_DTYPE_Q_U64] = false,
286 	[VSD_DTYPE_CRHIST32] = true,
287 	[VSD_DTYPE_DRHIST32] = true,
288 	[VSD_DTYPE_DVHIST32] = true,
289 	[VSD_DTYPE_CRHIST64] = true,
290 	[VSD_DTYPE_DRHIST64] = true,
291 	[VSD_DTYPE_DVHIST64] = true,
292 	[VSD_DTYPE_TDGSTCLUST32] = true,
293 	[VSD_DTYPE_TDGSTCLUST64] = true,
294 };
295 
296 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
297 	[LIM_MIN] = {
298 		[VSD_DTYPE_VOISTATE] = {0},
299 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
300 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
301 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
302 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
303 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
304 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
305 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
306 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
307 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
308 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
309 	},
310 	[LIM_MAX] = {
311 		[VSD_DTYPE_VOISTATE] = {0},
312 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
313 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
314 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
315 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
316 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
317 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
318 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
319 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
320 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
321 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
322 	}
323 };
324 
325 /* tpllistlock protects tpllist and ntpl */
326 static uint32_t ntpl;
327 static struct statsblob_tpl **tpllist;
328 
329 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
330     int flags);
331 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
332 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
333     uint32_t flags);
334 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
335     int newvoistatbytes, int newvoistatdatabytes);
336 static void stats_v1_blob_iter(struct statsblobv1 *sb,
337     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
338 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
339     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
340 
341 static inline int
342 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
343 {
344 
345 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
346 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
347 	    Q_RELPREC(c1->mu, c2->mu)));
348 
349        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
350 }
351 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
352 
353 static inline int
354 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
355 {
356 
357 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
358 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
359 	    Q_RELPREC(c1->mu, c2->mu)));
360 
361        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
362 }
363 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
364 
365 #ifdef DIAGNOSTIC
366 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
367 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
368 #endif
369 
370 static inline sbintime_t
371 stats_sbinuptime(void)
372 {
373 	sbintime_t sbt;
374 #ifdef _KERNEL
375 
376 	sbt = sbinuptime();
377 #else /* ! _KERNEL */
378 	struct timespec tp;
379 
380 	clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
381 	sbt = tstosbt(tp);
382 #endif /* _KERNEL */
383 
384 	return (sbt);
385 }
386 
387 static inline void *
388 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
389 {
390 
391 #ifdef _KERNEL
392 	/* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
393 	if (!(flags & (M_WAITOK | M_NOWAIT)))
394 		flags |= M_NOWAIT;
395 	ptr = realloc(ptr, newsz, M_STATS, flags);
396 #else /* ! _KERNEL */
397 	ptr = realloc(ptr, newsz);
398 	if ((flags & M_ZERO) && ptr != NULL) {
399 		if (oldsz == 0)
400 			memset(ptr, '\0', newsz);
401 		else if (newsz > oldsz)
402 			memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
403 	}
404 #endif /* _KERNEL */
405 
406 	return (ptr);
407 }
408 
409 static inline char *
410 stats_strdup(const char *s,
411 #ifdef _KERNEL
412     int flags)
413 {
414 	char *copy;
415 	size_t len;
416 
417 	if (!(flags & (M_WAITOK | M_NOWAIT)))
418 		flags |= M_NOWAIT;
419 
420 	len = strlen(s) + 1;
421 	if ((copy = malloc(len, M_STATS, flags)) != NULL)
422 		bcopy(s, copy, len);
423 
424 	return (copy);
425 #else
426     int flags __unused)
427 {
428 	return (strdup(s));
429 #endif
430 }
431 
432 static inline void
433 stats_tpl_update_hash(struct statsblob_tpl *tpl)
434 {
435 
436 	TPL_LIST_WLOCK_ASSERT();
437 	tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
438 	for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
439 		if (tpl->mb->voi_meta[voi_id].name != NULL)
440 			tpl->mb->tplhash = hash32_str(
441 			    tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
442 	}
443 	tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
444 	    tpl->mb->tplhash);
445 }
446 
447 static inline uint64_t
448 stats_pow_u64(uint64_t base, uint64_t exp)
449 {
450 	uint64_t result = 1;
451 
452 	while (exp) {
453 		if (exp & 1)
454 			result *= base;
455 		exp >>= 1;
456 		base *= base;
457 	}
458 
459 	return (result);
460 }
461 
462 static inline int
463 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
464     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
465 {
466 	uint64_t step = 0;
467 	int error = 0;
468 
469 	switch (info->scheme) {
470 	case BKT_LIN:
471 		step = info->lin.stepinc;
472 		break;
473 	case BKT_EXP:
474 		step = stats_pow_u64(info->exp.stepbase,
475 		    info->exp.stepexp + curbkt);
476 		break;
477 	case BKT_LINEXP:
478 		{
479 		uint64_t curstepexp = 1;
480 
481 		switch (info->voi_dtype) {
482 		case VSD_DTYPE_INT_S32:
483 			while ((int32_t)stats_pow_u64(info->linexp.stepbase,
484 			    curstepexp) <= bkt_lb->int32.s32)
485 				curstepexp++;
486 			break;
487 		case VSD_DTYPE_INT_U32:
488 			while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
489 			    curstepexp) <= bkt_lb->int32.u32)
490 				curstepexp++;
491 			break;
492 		case VSD_DTYPE_INT_S64:
493 			while ((int64_t)stats_pow_u64(info->linexp.stepbase,
494 			    curstepexp) <= bkt_lb->int64.s64)
495 				curstepexp++;
496 			break;
497 		case VSD_DTYPE_INT_U64:
498 			while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
499 			    curstepexp) <= bkt_lb->int64.u64)
500 				curstepexp++;
501 			break;
502 		case VSD_DTYPE_INT_SLONG:
503 			while ((long)stats_pow_u64(info->linexp.stepbase,
504 			    curstepexp) <= bkt_lb->intlong.slong)
505 				curstepexp++;
506 			break;
507 		case VSD_DTYPE_INT_ULONG:
508 			while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
509 			    curstepexp) <= bkt_lb->intlong.ulong)
510 				curstepexp++;
511 			break;
512 		case VSD_DTYPE_Q_S32:
513 			while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
514 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
515 			break;
516 		case VSD_DTYPE_Q_U32:
517 			while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
518 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
519 			break;
520 		case VSD_DTYPE_Q_S64:
521 			while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
522 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
523 				curstepexp++;
524 			break;
525 		case VSD_DTYPE_Q_U64:
526 			while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
527 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
528 				curstepexp++;
529 			break;
530 		default:
531 			break;
532 		}
533 
534 		step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
535 		    info->linexp.linstepdiv;
536 		if (step == 0)
537 			step = 1;
538 		break;
539 		}
540 	default:
541 		break;
542 	}
543 
544 	if (info->scheme == BKT_USR) {
545 		*bkt_lb = info->usr.bkts[curbkt].lb;
546 		*bkt_ub = info->usr.bkts[curbkt].ub;
547 	} else if (step != 0) {
548 		switch (info->voi_dtype) {
549 		case VSD_DTYPE_INT_S32:
550 			bkt_ub->int32.s32 += (int32_t)step;
551 			break;
552 		case VSD_DTYPE_INT_U32:
553 			bkt_ub->int32.u32 += (uint32_t)step;
554 			break;
555 		case VSD_DTYPE_INT_S64:
556 			bkt_ub->int64.s64 += (int64_t)step;
557 			break;
558 		case VSD_DTYPE_INT_U64:
559 			bkt_ub->int64.u64 += (uint64_t)step;
560 			break;
561 		case VSD_DTYPE_INT_SLONG:
562 			bkt_ub->intlong.slong += (long)step;
563 			break;
564 		case VSD_DTYPE_INT_ULONG:
565 			bkt_ub->intlong.ulong += (unsigned long)step;
566 			break;
567 		case VSD_DTYPE_Q_S32:
568 			error = Q_QADDI(&bkt_ub->q32.sq32, step);
569 			break;
570 		case VSD_DTYPE_Q_U32:
571 			error = Q_QADDI(&bkt_ub->q32.uq32, step);
572 			break;
573 		case VSD_DTYPE_Q_S64:
574 			error = Q_QADDI(&bkt_ub->q64.sq64, step);
575 			break;
576 		case VSD_DTYPE_Q_U64:
577 			error = Q_QADDI(&bkt_ub->q64.uq64, step);
578 			break;
579 		default:
580 			break;
581 		}
582 	} else { /* info->scheme != BKT_USR && step == 0 */
583 		return (EINVAL);
584 	}
585 
586 	return (error);
587 }
588 
589 static uint32_t
590 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
591 {
592 	struct voistatdata_numeric bkt_lb, bkt_ub;
593 	uint32_t nbkts;
594 	int done;
595 
596 	if (info->scheme == BKT_USR) {
597 		/* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
598 		info->lb = info->usr.bkts[0].lb;
599 		info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
600 	}
601 
602 	nbkts = 0;
603 	done = 0;
604 	bkt_ub = info->lb;
605 
606 	do {
607 		bkt_lb = bkt_ub;
608 		if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
609 			return (0);
610 
611 		if (info->scheme == BKT_USR)
612 			done = (nbkts == info->usr.nbkts);
613 		else {
614 			switch (info->voi_dtype) {
615 			case VSD_DTYPE_INT_S32:
616 				done = (bkt_ub.int32.s32 > info->ub.int32.s32);
617 				break;
618 			case VSD_DTYPE_INT_U32:
619 				done = (bkt_ub.int32.u32 > info->ub.int32.u32);
620 				break;
621 			case VSD_DTYPE_INT_S64:
622 				done = (bkt_ub.int64.s64 > info->ub.int64.s64);
623 				break;
624 			case VSD_DTYPE_INT_U64:
625 				done = (bkt_ub.int64.u64 > info->ub.int64.u64);
626 				break;
627 			case VSD_DTYPE_INT_SLONG:
628 				done = (bkt_ub.intlong.slong >
629 				    info->ub.intlong.slong);
630 				break;
631 			case VSD_DTYPE_INT_ULONG:
632 				done = (bkt_ub.intlong.ulong >
633 				    info->ub.intlong.ulong);
634 				break;
635 			case VSD_DTYPE_Q_S32:
636 				done = Q_QGTQ(bkt_ub.q32.sq32,
637 				    info->ub.q32.sq32);
638 				break;
639 			case VSD_DTYPE_Q_U32:
640 				done = Q_QGTQ(bkt_ub.q32.uq32,
641 				    info->ub.q32.uq32);
642 				break;
643 			case VSD_DTYPE_Q_S64:
644 				done = Q_QGTQ(bkt_ub.q64.sq64,
645 				    info->ub.q64.sq64);
646 				break;
647 			case VSD_DTYPE_Q_U64:
648 				done = Q_QGTQ(bkt_ub.q64.uq64,
649 				    info->ub.q64.uq64);
650 				break;
651 			default:
652 				return (0);
653 			}
654 		}
655 	} while (!done);
656 
657 	if (info->flags & VSD_HIST_LBOUND_INF)
658 		nbkts++;
659 	if (info->flags & VSD_HIST_UBOUND_INF)
660 		nbkts++;
661 
662 	return (nbkts);
663 }
664 
665 int
666 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
667     struct vss_hist_hlpr_info *info)
668 {
669 	struct voistatdata_hist *hist;
670 	struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
671 	    *ubinfbktlb, *ubinfbktub;
672 	uint32_t bkt, nbkts, nloop;
673 
674 	if (vss == NULL || info == NULL || (info->flags &
675 	(VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
676 	VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
677 		return (EINVAL);
678 
679 	info->voi_dtype = voi_dtype;
680 
681 	if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
682 		return (EINVAL);
683 
684 	switch (info->hist_dtype) {
685 	case VSD_DTYPE_CRHIST32:
686 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
687 		break;
688 	case VSD_DTYPE_DRHIST32:
689 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
690 		break;
691 	case VSD_DTYPE_DVHIST32:
692 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
693 		break;
694 	case VSD_DTYPE_CRHIST64:
695 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
696 		break;
697 	case VSD_DTYPE_DRHIST64:
698 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
699 		break;
700 	case VSD_DTYPE_DVHIST64:
701 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
702 		break;
703 	default:
704 		return (EINVAL);
705 	}
706 
707 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
708 	if (vss->iv == NULL)
709 		return (ENOMEM);
710 
711 	hist = (struct voistatdata_hist *)vss->iv;
712 	bkt_ub = info->lb;
713 
714 	for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
715 	    bkt < nbkts;
716 	    bkt++, nloop++) {
717 		bkt_lb = bkt_ub;
718 		if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
719 			return (EINVAL);
720 
721 		switch (info->hist_dtype) {
722 		case VSD_DTYPE_CRHIST32:
723 			VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
724 			break;
725 		case VSD_DTYPE_DRHIST32:
726 			VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
727 			VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
728 			break;
729 		case VSD_DTYPE_DVHIST32:
730 			VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
731 			break;
732 		case VSD_DTYPE_CRHIST64:
733 			VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
734 			break;
735 		case VSD_DTYPE_DRHIST64:
736 			VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
737 			VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
738 			break;
739 		case VSD_DTYPE_DVHIST64:
740 			VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
741 			break;
742 		default:
743 			return (EINVAL);
744 		}
745 	}
746 
747 	lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
748 
749 	switch (info->hist_dtype) {
750 	case VSD_DTYPE_CRHIST32:
751 		lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
752 		ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
753 		break;
754 	case VSD_DTYPE_DRHIST32:
755 		lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
756 		lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
757 		ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
758 		ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
759 		break;
760 	case VSD_DTYPE_CRHIST64:
761 		lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
762 		ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
763 		break;
764 	case VSD_DTYPE_DRHIST64:
765 		lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
766 		lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
767 		ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
768 		ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
769 		break;
770 	case VSD_DTYPE_DVHIST32:
771 	case VSD_DTYPE_DVHIST64:
772 		break;
773 	default:
774 		return (EINVAL);
775 	}
776 
777 	if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
778 		*lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
779 		/*
780 		 * Assignment from numeric_limit array for Q types assigns max
781 		 * possible integral/fractional value for underlying data type,
782 		 * but we must set control bits for this specific histogram per
783 		 * the user's choice of fractional bits, which we extract from
784 		 * info->lb.
785 		 */
786 		if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
787 		    info->voi_dtype == VSD_DTYPE_Q_U32) {
788 			/* Signedness doesn't matter for setting control bits. */
789 			Q_SCVAL(lbinfbktlb->q32.sq32,
790 			    Q_GCVAL(info->lb.q32.sq32));
791 		} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
792 		    info->voi_dtype == VSD_DTYPE_Q_U64) {
793 			/* Signedness doesn't matter for setting control bits. */
794 			Q_SCVAL(lbinfbktlb->q64.sq64,
795 			    Q_GCVAL(info->lb.q64.sq64));
796 		}
797 		if (lbinfbktub)
798 			*lbinfbktub = info->lb;
799 	}
800 	if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
801 		*ubinfbktlb = bkt_lb;
802 		if (ubinfbktub) {
803 			*ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
804 			if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
805 			    info->voi_dtype == VSD_DTYPE_Q_U32) {
806 				Q_SCVAL(ubinfbktub->q32.sq32,
807 				    Q_GCVAL(info->lb.q32.sq32));
808 			} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
809 			    info->voi_dtype == VSD_DTYPE_Q_U64) {
810 				Q_SCVAL(ubinfbktub->q64.sq64,
811 				    Q_GCVAL(info->lb.q64.sq64));
812 			}
813 		}
814 	}
815 
816 	return (0);
817 }
818 
819 int
820 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
821     struct vss_tdgst_hlpr_info *info)
822 {
823 	struct voistatdata_tdgst *tdgst;
824 	struct ctdth32 *ctd32tree;
825 	struct ctdth64 *ctd64tree;
826 	struct voistatdata_tdgstctd32 *ctd32;
827 	struct voistatdata_tdgstctd64 *ctd64;
828 
829 	info->voi_dtype = voi_dtype;
830 
831 	switch (info->tdgst_dtype) {
832 	case VSD_DTYPE_TDGSTCLUST32:
833 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
834 		break;
835 	case VSD_DTYPE_TDGSTCLUST64:
836 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
837 		break;
838 	default:
839 		return (EINVAL);
840 	}
841 
842 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
843 	if (vss->iv == NULL)
844 		return (ENOMEM);
845 
846 	tdgst = (struct voistatdata_tdgst *)vss->iv;
847 
848 	switch (info->tdgst_dtype) {
849 	case VSD_DTYPE_TDGSTCLUST32:
850 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
851 		ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
852 			Q_INI(&ctd32->mu, 0, 0, info->prec);
853 		}
854 		break;
855 	case VSD_DTYPE_TDGSTCLUST64:
856 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
857 		ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
858 			Q_INI(&ctd64->mu, 0, 0, info->prec);
859 		}
860 		break;
861 	default:
862 		return (EINVAL);
863 	}
864 
865 	return (0);
866 }
867 
868 int
869 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
870     struct vss_numeric_hlpr_info *info)
871 {
872 	struct voistatdata_numeric iv;
873 
874 	switch (vss->stype) {
875 	case VS_STYPE_SUM:
876 		iv = stats_ctor_vsd_numeric(0);
877 		break;
878 	case VS_STYPE_MIN:
879 		iv = numeric_limits[LIM_MAX][voi_dtype];
880 		break;
881 	case VS_STYPE_MAX:
882 		iv = numeric_limits[LIM_MIN][voi_dtype];
883 		break;
884 	default:
885 		return (EINVAL);
886 	}
887 
888 	vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
889 	if (vss->iv == NULL)
890 		return (ENOMEM);
891 
892 	vss->vs_dtype = voi_dtype;
893 	vss->vsdsz = vsd_dtype2size[voi_dtype];
894 	switch (voi_dtype) {
895 	case VSD_DTYPE_INT_S32:
896 		*((int32_t *)vss->iv) = iv.int32.s32;
897 		break;
898 	case VSD_DTYPE_INT_U32:
899 		*((uint32_t *)vss->iv) = iv.int32.u32;
900 		break;
901 	case VSD_DTYPE_INT_S64:
902 		*((int64_t *)vss->iv) = iv.int64.s64;
903 		break;
904 	case VSD_DTYPE_INT_U64:
905 		*((uint64_t *)vss->iv) = iv.int64.u64;
906 		break;
907 	case VSD_DTYPE_INT_SLONG:
908 		*((long *)vss->iv) = iv.intlong.slong;
909 		break;
910 	case VSD_DTYPE_INT_ULONG:
911 		*((unsigned long *)vss->iv) = iv.intlong.ulong;
912 		break;
913 	case VSD_DTYPE_Q_S32:
914 		*((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
915 		    Q_CTRLINI(info->prec));
916 		break;
917 	case VSD_DTYPE_Q_U32:
918 		*((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
919 		    Q_CTRLINI(info->prec));
920 		break;
921 	case VSD_DTYPE_Q_S64:
922 		*((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
923 		    Q_CTRLINI(info->prec));
924 		break;
925 	case VSD_DTYPE_Q_U64:
926 		*((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
927 		    Q_CTRLINI(info->prec));
928 		break;
929 	default:
930 		break;
931 	}
932 
933 	return (0);
934 }
935 
936 int
937 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
938     struct voistatspec *vss)
939 {
940 	int i, ret;
941 
942 	for (i = nvss - 1; i >= 0; i--) {
943 		if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
944 		    vss[i].hlprinfo)) != 0)
945 			return (ret);
946 	}
947 
948 	return (0);
949 }
950 
951 void
952 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
953 {
954 	int i;
955 
956 	for (i = nvss - 1; i >= 0; i--) {
957 		if (vss[i].hlpr) {
958 			stats_free((void *)vss[i].iv);
959 			vss[i].iv = NULL;
960 		}
961 	}
962 }
963 
964 int
965 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
966 {
967 	int error;
968 
969 	error = 0;
970 
971 	TPL_LIST_WLOCK();
972 	if (tpl_id < 0 || tpl_id >= (int)ntpl) {
973 		error = ENOENT;
974 	} else {
975 		*tpl = tpllist[tpl_id];
976 		/* XXXLAS: Acquire refcount on tpl. */
977 	}
978 	TPL_LIST_WUNLOCK();
979 
980 	return (error);
981 }
982 
983 int
984 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
985 {
986 	int i, tpl_id;
987 
988 	tpl_id = -ESRCH;
989 
990 	TPL_LIST_RLOCK();
991 	for (i = ntpl - 1; i >= 0; i--) {
992 		if (name != NULL) {
993 			if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
994 			    strncmp(name, tpllist[i]->mb->tplname,
995 			    TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
996 			    tpllist[i]->mb->tplhash)) {
997 				tpl_id = i;
998 				break;
999 			}
1000 		} else if (hash == tpllist[i]->mb->tplhash) {
1001 			tpl_id = i;
1002 			break;
1003 		}
1004 	}
1005 	TPL_LIST_RUNLOCK();
1006 
1007 	return (tpl_id);
1008 }
1009 
1010 int
1011 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1012 {
1013 	int error;
1014 
1015 	error = 0;
1016 
1017 	TPL_LIST_RLOCK();
1018 	if (tpl_id < ntpl) {
1019 		if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1020 			strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1021 		else
1022 			error = EOVERFLOW;
1023 	} else
1024 		error = ENOENT;
1025 	TPL_LIST_RUNLOCK();
1026 
1027 	return (error);
1028 }
1029 
1030 int
1031 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1032     void *seed_bytes, size_t seed_len)
1033 {
1034 	uint32_t cum_pct, rnd_pct;
1035 	int i;
1036 
1037 	cum_pct = 0;
1038 
1039 	/*
1040 	 * Choose a pseudorandom or seeded number in range [0,100] and use
1041 	 * it to make a sampling decision and template selection where required.
1042 	 * If no seed is supplied, a PRNG is used to generate a pseudorandom
1043 	 * number so that every selection is independent. If a seed is supplied,
1044 	 * the caller desires random selection across different seeds, but
1045 	 * deterministic selection given the same seed. This is achieved by
1046 	 * hashing the seed and using the hash as the random number source.
1047 	 *
1048 	 * XXXLAS: Characterise hash function output distribution.
1049 	 */
1050 	if (seed_bytes == NULL)
1051 		rnd_pct = random() / (INT32_MAX / 100);
1052 	else
1053 		rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1054 		    (UINT32_MAX / 100U);
1055 
1056 	/*
1057 	 * We map the randomly selected percentage on to the interval [0,100]
1058 	 * consisting of the cumulatively summed template sampling percentages.
1059 	 * The difference between the cumulative sum of all template sampling
1060 	 * percentages and 100 is treated as a NULL assignment i.e. no stats
1061 	 * template will be assigned, and -1 returned instead.
1062 	 */
1063 	for (i = 0; i < nrates; i++) {
1064 		cum_pct += rates[i].tpl_sample_pct;
1065 
1066 		KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1067 		    cum_pct));
1068 		if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1069 			continue;
1070 
1071 		return (rates[i].tpl_slot_id);
1072 	}
1073 
1074 	return (-1);
1075 }
1076 
1077 int
1078 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1079     struct statsblobv1 *src, uint32_t flags)
1080 {
1081 	int error;
1082 
1083 	error = 0;
1084 
1085 	if (src == NULL || dst == NULL ||
1086 	    src->cursz < sizeof(struct statsblob) ||
1087 	    ((flags & SB_CLONE_ALLOCDST) &&
1088 	    (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1089 		error = EINVAL;
1090 	} else if (flags & SB_CLONE_ALLOCDST) {
1091 		*dst = stats_realloc(NULL, 0, src->cursz, 0);
1092 		if (*dst)
1093 			(*dst)->maxsz = dstmaxsz = src->cursz;
1094 		else
1095 			error = ENOMEM;
1096 	} else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1097 		error = EINVAL;
1098 	}
1099 
1100 	if (!error) {
1101 		size_t postcurszlen;
1102 
1103 		/*
1104 		 * Clone src into dst except for the maxsz field. If dst is too
1105 		 * small to hold all of src, only copy src's header and return
1106 		 * EOVERFLOW.
1107 		 */
1108 #ifdef _KERNEL
1109 		if (flags & SB_CLONE_USRDSTNOFAULT)
1110 			error = copyout_nofault(src, *dst,
1111 			    offsetof(struct statsblob, maxsz));
1112 		else if (flags & SB_CLONE_USRDST)
1113 			error = copyout(src, *dst,
1114 			    offsetof(struct statsblob, maxsz));
1115 		else
1116 #endif
1117 			memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1118 #ifdef _KERNEL
1119 		if (error != 0)
1120 			goto out;
1121 #endif
1122 
1123 
1124 		if (dstmaxsz >= src->cursz) {
1125 			postcurszlen = src->cursz -
1126 			    offsetof(struct statsblob, cursz);
1127 		} else {
1128 			error = EOVERFLOW;
1129 			postcurszlen = sizeof(struct statsblob) -
1130 			    offsetof(struct statsblob, cursz);
1131 		}
1132 #ifdef _KERNEL
1133 		if (flags & SB_CLONE_USRDSTNOFAULT)
1134 			error = copyout_nofault(&(src->cursz), &((*dst)->cursz),
1135 			    postcurszlen);
1136 		else if (flags & SB_CLONE_USRDST)
1137 			error = copyout(&(src->cursz), &((*dst)->cursz),
1138 			    postcurszlen);
1139 		else
1140 #endif
1141 			memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1142 	}
1143 #ifdef _KERNEL
1144 out:
1145 #endif
1146 
1147 	return (error);
1148 }
1149 
1150 int
1151 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1152 {
1153 	struct statsblobv1_tpl *tpl, **newtpllist;
1154 	struct statsblobv1 *tpl_sb;
1155 	struct metablob *tpl_mb;
1156 	int tpl_id;
1157 
1158 	if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1159 		return (-EINVAL);
1160 
1161 	if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1162 		return (-EEXIST);
1163 
1164 	tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1165 	tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1166 	tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1167 
1168 	if (tpl_mb != NULL && name != NULL)
1169 		tpl_mb->tplname = stats_strdup(name, 0);
1170 
1171 	if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1172 	    tpl_mb->tplname == NULL) {
1173 		stats_free(tpl);
1174 		stats_free(tpl_sb);
1175 		if (tpl_mb != NULL) {
1176 			stats_free(tpl_mb->tplname);
1177 			stats_free(tpl_mb);
1178 		}
1179 		return (-ENOMEM);
1180 	}
1181 
1182 	tpl->mb = tpl_mb;
1183 	tpl->sb = tpl_sb;
1184 
1185 	tpl_sb->abi = STATS_ABI_V1;
1186 	tpl_sb->endian =
1187 #if BYTE_ORDER == LITTLE_ENDIAN
1188 	    SB_LE;
1189 #elif BYTE_ORDER == BIG_ENDIAN
1190 	    SB_BE;
1191 #else
1192 	    SB_UE;
1193 #endif
1194 	tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1195 	tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1196 
1197 	TPL_LIST_WLOCK();
1198 	newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1199 	    (ntpl + 1) * sizeof(void *), 0);
1200 	if (newtpllist != NULL) {
1201 		tpl_id = ntpl++;
1202 		tpllist = (struct statsblob_tpl **)newtpllist;
1203 		tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1204 		stats_tpl_update_hash(tpllist[tpl_id]);
1205 	} else {
1206 		stats_free(tpl);
1207 		stats_free(tpl_sb);
1208 		if (tpl_mb != NULL) {
1209 			stats_free(tpl_mb->tplname);
1210 			stats_free(tpl_mb);
1211 		}
1212 		tpl_id = -ENOMEM;
1213 	}
1214 	TPL_LIST_WUNLOCK();
1215 
1216 	return (tpl_id);
1217 }
1218 
1219 int
1220 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1221     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1222     uint32_t flags)
1223 {
1224 	struct voi *voi;
1225 	struct voistat *tmpstat;
1226 	struct statsblobv1 *tpl_sb;
1227 	struct metablob *tpl_mb;
1228 	int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1229 	    newvoistatdatabytes, newvoistatmaxid;
1230 	uint32_t nbytes;
1231 
1232 	if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1233 	    nvss == 0 || vss == NULL)
1234 		return (EINVAL);
1235 
1236 	error = nbytes = newvoibytes = newvoistatbytes =
1237 	    newvoistatdatabytes = 0;
1238 	newvoistatmaxid = -1;
1239 
1240 	/* Calculate the number of bytes required for the new voistats. */
1241 	for (i = nvss - 1; i >= 0; i--) {
1242 		if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1243 		    vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1244 		    vss[i].iv == NULL || vss[i].vsdsz == 0)
1245 			return (EINVAL);
1246 		if ((int)vss[i].stype > newvoistatmaxid)
1247 			newvoistatmaxid = vss[i].stype;
1248 		newvoistatdatabytes += vss[i].vsdsz;
1249 	}
1250 
1251 	if (flags & SB_VOI_RELUPDATE) {
1252 		/* XXXLAS: VOI state bytes may need to vary based on stat types. */
1253 		newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1254 	}
1255 	nbytes += newvoistatdatabytes;
1256 
1257 	TPL_LIST_WLOCK();
1258 	if (tpl_id < ntpl) {
1259 		tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1260 		tpl_mb = tpllist[tpl_id]->mb;
1261 
1262 		if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1263 			/* Adding a new VOI and associated stats. */
1264 			if (voi_id >= NVOIS(tpl_sb)) {
1265 				/* We need to grow the tpl_sb->vois array. */
1266 				newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1267 				    sizeof(struct voi);
1268 				nbytes += newvoibytes;
1269 			}
1270 			newvoistatbytes =
1271 			    (newvoistatmaxid + 1) * sizeof(struct voistat);
1272 		} else {
1273 			/* Adding stats to an existing VOI. */
1274 			if (newvoistatmaxid >
1275 			    tpl_sb->vois[voi_id].voistatmaxid) {
1276 				newvoistatbytes = (newvoistatmaxid -
1277 				    tpl_sb->vois[voi_id].voistatmaxid) *
1278 				    sizeof(struct voistat);
1279 			}
1280 			/* XXXLAS: KPI does not yet support expanding VOIs. */
1281 			error = EOPNOTSUPP;
1282 		}
1283 		nbytes += newvoistatbytes;
1284 
1285 		if (!error && newvoibytes > 0) {
1286 			struct voi_meta *voi_meta = tpl_mb->voi_meta;
1287 
1288 			voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1289 			    0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1290 			    (1 + voi_id) * sizeof(struct voi_meta),
1291 			    M_ZERO);
1292 
1293 			if (voi_meta == NULL)
1294 				error = ENOMEM;
1295 			else
1296 				tpl_mb->voi_meta = voi_meta;
1297 		}
1298 
1299 		if (!error) {
1300 			/* NB: Resizing can change where tpl_sb points. */
1301 			error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1302 			    newvoistatbytes, newvoistatdatabytes);
1303 		}
1304 
1305 		if (!error) {
1306 			tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1307 			    0);
1308 			if (tpl_mb->voi_meta[voi_id].name == NULL)
1309 				error = ENOMEM;
1310 		}
1311 
1312 		if (!error) {
1313 			/* Update the template list with the resized pointer. */
1314 			tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1315 
1316 			/* Update the template. */
1317 			voi = &tpl_sb->vois[voi_id];
1318 
1319 			if (voi->id < 0) {
1320 				/* VOI is new and needs to be initialised. */
1321 				voi->id = voi_id;
1322 				voi->dtype = voi_dtype;
1323 				voi->stats_off = tpl_sb->stats_off;
1324 				if (flags & SB_VOI_RELUPDATE)
1325 					voi->flags |= VOI_REQSTATE;
1326 			} else {
1327 				/*
1328 				 * XXXLAS: When this else block is written, the
1329 				 * "KPI does not yet support expanding VOIs"
1330 				 * error earlier in this function can be
1331 				 * removed. What is required here is to shuffle
1332 				 * the voistat array such that the new stats for
1333 				 * the voi are contiguous, which will displace
1334 				 * stats for other vois that reside after the
1335 				 * voi being updated. The other vois then need
1336 				 * to have their stats_off adjusted post
1337 				 * shuffle.
1338 				 */
1339 			}
1340 
1341 			voi->voistatmaxid = newvoistatmaxid;
1342 			newstatdataidx = 0;
1343 
1344 			if (voi->flags & VOI_REQSTATE) {
1345 				/* Initialise the voistate stat in slot 0. */
1346 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1347 				tmpstat->stype = VS_STYPE_VOISTATE;
1348 				tmpstat->flags = 0;
1349 				tmpstat->dtype = VSD_DTYPE_VOISTATE;
1350 				newstatdataidx = tmpstat->dsz =
1351 				    sizeof(struct voistatdata_numeric);
1352 				tmpstat->data_off = tpl_sb->statsdata_off;
1353 			}
1354 
1355 			for (i = 0; (uint32_t)i < nvss; i++) {
1356 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1357 				    (vss[i].stype * sizeof(struct voistat)));
1358 				KASSERT(tmpstat->stype < 0, ("voistat %p "
1359 				    "already initialised", tmpstat));
1360 				tmpstat->stype = vss[i].stype;
1361 				tmpstat->flags = vss[i].flags;
1362 				tmpstat->dtype = vss[i].vs_dtype;
1363 				tmpstat->dsz = vss[i].vsdsz;
1364 				tmpstat->data_off = tpl_sb->statsdata_off +
1365 				    newstatdataidx;
1366 				memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1367 				    vss[i].iv, vss[i].vsdsz);
1368 				newstatdataidx += vss[i].vsdsz;
1369 			}
1370 
1371 			/* Update the template version hash. */
1372 			stats_tpl_update_hash(tpllist[tpl_id]);
1373 			/* XXXLAS: Confirm tpl name/hash pair remains unique. */
1374 		}
1375 	} else
1376 		error = EINVAL;
1377 	TPL_LIST_WUNLOCK();
1378 
1379 	return (error);
1380 }
1381 
1382 struct statsblobv1 *
1383 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1384 {
1385 	struct statsblobv1 *sb;
1386 	int error;
1387 
1388 	sb = NULL;
1389 
1390 	TPL_LIST_RLOCK();
1391 	if (tpl_id < ntpl) {
1392 		sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1393 		if (sb != NULL) {
1394 			sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1395 			error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1396 		} else
1397 			error = ENOMEM;
1398 
1399 		if (error) {
1400 			stats_free(sb);
1401 			sb = NULL;
1402 		}
1403 	}
1404 	TPL_LIST_RUNLOCK();
1405 
1406 	return (sb);
1407 }
1408 
1409 void
1410 stats_v1_blob_destroy(struct statsblobv1 *sb)
1411 {
1412 
1413 	stats_free(sb);
1414 }
1415 
1416 int
1417 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1418     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1419     size_t *retvsdsz)
1420 {
1421 	struct voi *v;
1422 	struct voistat *vs;
1423 
1424 	if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1425 	    voi_id >= NVOIS(sb))
1426 		return (EINVAL);
1427 
1428 	v = &sb->vois[voi_id];
1429 	if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1430 		return (EINVAL);
1431 
1432 	vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1433 	*retvsd = BLOB_OFFSET(sb, vs->data_off);
1434 	if (retdtype != NULL)
1435 		*retdtype = vs->dtype;
1436 	if (retvsdsz != NULL)
1437 		*retvsdsz = vs->dsz;
1438 
1439 	return (0);
1440 }
1441 
1442 int
1443 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1444 {
1445 	int error;
1446 
1447 	error = 0;
1448 
1449 	TPL_LIST_RLOCK();
1450 	if (sb == NULL || tpl_id >= ntpl) {
1451 		error = EINVAL;
1452 	} else {
1453 		error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1454 	}
1455 	TPL_LIST_RUNLOCK();
1456 
1457 	return (error);
1458 }
1459 
1460 static inline int
1461 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1462     uint32_t flags __unused)
1463 {
1464 	int error;
1465 
1466 	TPL_LIST_RLOCK_ASSERT();
1467 	error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1468 	KASSERT(!error,
1469 	    ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1470 
1471 	if (!error) {
1472 		memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1473 		sb->created = sb->lastrst = stats_sbinuptime();
1474 		sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1475 	}
1476 
1477 	return (error);
1478 }
1479 
1480 static int
1481 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1482     int newvoistatbytes, int newvoistatdatabytes)
1483 {
1484 	struct statsblobv1 *sb;
1485 	struct voi *tmpvoi;
1486 	struct voistat *tmpvoistat, *voistat_array;
1487 	int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1488 
1489 	KASSERT(newvoibytes % sizeof(struct voi) == 0,
1490 	    ("Bad newvoibytes %d", newvoibytes));
1491 	KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1492 	    ("Bad newvoistatbytes %d", newvoistatbytes));
1493 
1494 	error = ((newvoibytes % sizeof(struct voi) == 0) &&
1495 	    (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1496 	sb = *sbpp;
1497 	nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1498 
1499 	/*
1500 	 * XXXLAS: Required until we gain support for flags which alter the
1501 	 * units of size/offset fields in key structs.
1502 	 */
1503 	if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1504 		error = EFBIG;
1505 
1506 	if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1507 		/* Need to expand our blob. */
1508 		sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1509 		if (sb != NULL) {
1510 			sb->maxsz = sb->cursz + nbytes;
1511 			*sbpp = sb;
1512 		} else
1513 		    error = ENOMEM;
1514 	}
1515 
1516 	if (!error) {
1517 		/*
1518 		 * Shuffle memory within the expanded blob working from the end
1519 		 * backwards, leaving gaps for the new voistat and voistatdata
1520 		 * structs at the beginning of their respective blob regions,
1521 		 * and for the new voi structs at the end of their blob region.
1522 		 */
1523 		memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1524 		    BLOB_OFFSET(sb, sb->statsdata_off),
1525 		    sb->cursz - sb->statsdata_off);
1526 		memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1527 		    newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1528 		    sb->statsdata_off - sb->stats_off);
1529 
1530 		/* First index of new voi/voistat structs to be initialised. */
1531 		idxnewvois = NVOIS(sb);
1532 		idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1533 
1534 		/* Update housekeeping variables and offsets. */
1535 		sb->cursz += nbytes;
1536 		sb->stats_off += newvoibytes;
1537 		sb->statsdata_off += newvoibytes + newvoistatbytes;
1538 
1539 		/* XXXLAS: Zeroing not strictly needed but aids debugging. */
1540 		memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1541 		memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1542 		    newvoistatbytes);
1543 		memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1544 		    newvoistatdatabytes);
1545 
1546 		/* Initialise new voi array members and update offsets. */
1547 		for (i = 0; i < NVOIS(sb); i++) {
1548 			tmpvoi = &sb->vois[i];
1549 			if (i >= idxnewvois) {
1550 				tmpvoi->id = tmpvoi->voistatmaxid = -1;
1551 			} else if (tmpvoi->id > -1) {
1552 				tmpvoi->stats_off += newvoibytes +
1553 				    newvoistatbytes;
1554 			}
1555 		}
1556 
1557 		/* Initialise new voistat array members and update offsets. */
1558 		nvoistats = (sb->statsdata_off - sb->stats_off) /
1559 		    sizeof(struct voistat);
1560 		voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1561 		for (i = 0; i < nvoistats; i++) {
1562 			tmpvoistat = &voistat_array[i];
1563 			if (i <= idxnewvoistats) {
1564 				tmpvoistat->stype = -1;
1565 			} else if (tmpvoistat->stype > -1) {
1566 				tmpvoistat->data_off += nbytes;
1567 			}
1568 		}
1569 	}
1570 
1571 	return (error);
1572 }
1573 
1574 static void
1575 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1576 {
1577 
1578 	/* XXXLAS: Fill this in. */
1579 }
1580 
1581 static void
1582 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1583     void *usrctx, uint32_t flags)
1584 {
1585 	struct voi *v;
1586 	struct voistat *vs;
1587 	struct sb_iter_ctx ctx;
1588 	int i, j, firstvoi;
1589 
1590 	ctx.usrctx = usrctx;
1591 	ctx.flags = SB_IT_FIRST_CB;
1592 	firstvoi = 1;
1593 
1594 	for (i = 0; i < NVOIS(sb); i++) {
1595 		v = &sb->vois[i];
1596 		ctx.vslot = i;
1597 		ctx.vsslot = -1;
1598 		ctx.flags |= SB_IT_FIRST_VOISTAT;
1599 
1600 		if (firstvoi)
1601 			ctx.flags |= SB_IT_FIRST_VOI;
1602 		else if (i == (NVOIS(sb) - 1))
1603 			ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1604 
1605 		if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1606 			if (icb(sb, v, NULL, &ctx))
1607 				return;
1608 			firstvoi = 0;
1609 			ctx.flags &= ~SB_IT_FIRST_CB;
1610 		}
1611 
1612 		/* If NULL voi, v->voistatmaxid == -1 */
1613 		for (j = 0; j <= v->voistatmaxid; j++) {
1614 			vs = &((struct voistat *)BLOB_OFFSET(sb,
1615 			    v->stats_off))[j];
1616 			if (vs->stype < 0 &&
1617 			    !(flags & SB_IT_NULLVOISTAT))
1618 				continue;
1619 
1620 			if (j == v->voistatmaxid) {
1621 				ctx.flags |= SB_IT_LAST_VOISTAT;
1622 				if (i == (NVOIS(sb) - 1))
1623 					ctx.flags |=
1624 					    SB_IT_LAST_CB;
1625 			} else
1626 				ctx.flags &= ~SB_IT_LAST_CB;
1627 
1628 			ctx.vsslot = j;
1629 			if (icb(sb, v, vs, &ctx))
1630 				return;
1631 
1632 			ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1633 			    SB_IT_LAST_VOISTAT);
1634 		}
1635 		ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1636 	}
1637 }
1638 
1639 static inline void
1640 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1641     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1642     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1643 {
1644 	const struct ctdth32 *ctd32tree;
1645 	const struct ctdth64 *ctd64tree;
1646 	const struct voistatdata_tdgstctd32 *ctd32;
1647 	const struct voistatdata_tdgstctd64 *ctd64;
1648 	const char *fmtstr;
1649 	uint64_t smplcnt, compcnt;
1650 	int is32bit, qmaxstrlen;
1651 	uint16_t maxctds, curctds;
1652 
1653 	switch (tdgst_dtype) {
1654 	case VSD_DTYPE_TDGSTCLUST32:
1655 		smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1656 		compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1657 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1658 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1659 		ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1660 		ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1661 		    ARB_CMIN(ctdth32, ctd32tree));
1662 		qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1663 		is32bit = 1;
1664 		ctd64tree = NULL;
1665 		ctd64 = NULL;
1666 		break;
1667 	case VSD_DTYPE_TDGSTCLUST64:
1668 		smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1669 		compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1670 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1671 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1672 		ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1673 		ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1674 		    ARB_CMIN(ctdth64, ctd64tree));
1675 		qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1676 		is32bit = 0;
1677 		ctd32tree = NULL;
1678 		ctd32 = NULL;
1679 		break;
1680 	default:
1681 		return;
1682 	}
1683 
1684 	switch (fmt) {
1685 	case SB_STRFMT_FREEFORM:
1686 		fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1687 		break;
1688 	case SB_STRFMT_JSON:
1689 	default:
1690 		fmtstr =
1691 		    "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1692 		    "\"nctds\":%hu,\"ctds\":[";
1693 		break;
1694 	}
1695 	sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1696 	    maxctds, curctds);
1697 
1698 	while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1699 		char qstr[qmaxstrlen];
1700 
1701 		switch (fmt) {
1702 		case SB_STRFMT_FREEFORM:
1703 			fmtstr = "\n\t\t\t\t";
1704 			break;
1705 		case SB_STRFMT_JSON:
1706 		default:
1707 			fmtstr = "{";
1708 			break;
1709 		}
1710 		sbuf_cat(buf, fmtstr);
1711 
1712 		if (objdump) {
1713 			switch (fmt) {
1714 			case SB_STRFMT_FREEFORM:
1715 				fmtstr = "ctd[%hu].";
1716 				break;
1717 			case SB_STRFMT_JSON:
1718 			default:
1719 				fmtstr = "\"ctd\":%hu,";
1720 				break;
1721 			}
1722 			sbuf_printf(buf, fmtstr, is32bit ?
1723 			    ARB_SELFIDX(ctd32tree, ctd32) :
1724 			    ARB_SELFIDX(ctd64tree, ctd64));
1725 		}
1726 
1727 		switch (fmt) {
1728 		case SB_STRFMT_FREEFORM:
1729 			fmtstr = "{mu=";
1730 			break;
1731 		case SB_STRFMT_JSON:
1732 		default:
1733 			fmtstr = "\"mu\":";
1734 			break;
1735 		}
1736 		sbuf_cat(buf, fmtstr);
1737 		Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1738 		    sizeof(qstr));
1739 		sbuf_cat(buf, qstr);
1740 
1741 		switch (fmt) {
1742 		case SB_STRFMT_FREEFORM:
1743 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1744 			break;
1745 		case SB_STRFMT_JSON:
1746 		default:
1747 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1748 			break;
1749 		}
1750 		sbuf_printf(buf, fmtstr,
1751 		    is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1752 
1753 		if (is32bit)
1754 			ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1755 			    ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1756 			    ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1757 		else
1758 			ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1759 			    ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1760 			    ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1761 
1762 		if (fmt == SB_STRFMT_JSON &&
1763 		    (is32bit ? NULL != ctd32 : NULL != ctd64))
1764 			sbuf_putc(buf, ',');
1765 	}
1766 	if (fmt == SB_STRFMT_JSON)
1767 		sbuf_cat(buf, "]");
1768 }
1769 
1770 static inline void
1771 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1772     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1773     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1774 {
1775 	const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1776 	const char *fmtstr;
1777 	int is32bit;
1778 	uint16_t i, nbkts;
1779 
1780 	switch (hist_dtype) {
1781 	case VSD_DTYPE_CRHIST32:
1782 		nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1783 		is32bit = 1;
1784 		break;
1785 	case VSD_DTYPE_DRHIST32:
1786 		nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1787 		is32bit = 1;
1788 		break;
1789 	case VSD_DTYPE_DVHIST32:
1790 		nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1791 		is32bit = 1;
1792 		break;
1793 	case VSD_DTYPE_CRHIST64:
1794 		nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1795 		is32bit = 0;
1796 		break;
1797 	case VSD_DTYPE_DRHIST64:
1798 		nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1799 		is32bit = 0;
1800 		break;
1801 	case VSD_DTYPE_DVHIST64:
1802 		nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1803 		is32bit = 0;
1804 		break;
1805 	default:
1806 		return;
1807 	}
1808 
1809 	switch (fmt) {
1810 	case SB_STRFMT_FREEFORM:
1811 		fmtstr = "nbkts=%hu, ";
1812 		break;
1813 	case SB_STRFMT_JSON:
1814 	default:
1815 		fmtstr = "\"nbkts\":%hu,";
1816 		break;
1817 	}
1818 	sbuf_printf(buf, fmtstr, nbkts);
1819 
1820 	switch (fmt) {
1821 		case SB_STRFMT_FREEFORM:
1822 			fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1823 			break;
1824 		case SB_STRFMT_JSON:
1825 		default:
1826 			fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1827 			    "\"oob\":%ju,\"bkts\":[");
1828 			break;
1829 	}
1830 	sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1831 	    hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1832 	    hist_dtype, oob));
1833 
1834 	for (i = 0; i < nbkts; i++) {
1835 		switch (hist_dtype) {
1836 		case VSD_DTYPE_CRHIST32:
1837 		case VSD_DTYPE_CRHIST64:
1838 			bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1839 			    bkts[i].lb);
1840 			if (i < nbkts - 1)
1841 				bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1842 				    hist_dtype, bkts[i + 1].lb);
1843 			else
1844 				bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1845 			break;
1846 		case VSD_DTYPE_DRHIST32:
1847 		case VSD_DTYPE_DRHIST64:
1848 			bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1849 			    bkts[i].lb);
1850 			bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1851 			    bkts[i].ub);
1852 			break;
1853 		case VSD_DTYPE_DVHIST32:
1854 		case VSD_DTYPE_DVHIST64:
1855 			bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1856 			    hist_dtype, bkts[i].val);
1857 			break;
1858 		default:
1859 			break;
1860 		}
1861 
1862 		switch (fmt) {
1863 		case SB_STRFMT_FREEFORM:
1864 			fmtstr = "\n\t\t\t\t";
1865 			break;
1866 		case SB_STRFMT_JSON:
1867 		default:
1868 			fmtstr = "{";
1869 			break;
1870 		}
1871 		sbuf_cat(buf, fmtstr);
1872 
1873 		if (objdump) {
1874 			switch (fmt) {
1875 			case SB_STRFMT_FREEFORM:
1876 				fmtstr = "bkt[%hu].";
1877 				break;
1878 			case SB_STRFMT_JSON:
1879 			default:
1880 				fmtstr = "\"bkt\":%hu,";
1881 				break;
1882 			}
1883 			sbuf_printf(buf, fmtstr, i);
1884 		}
1885 
1886 		switch (fmt) {
1887 		case SB_STRFMT_FREEFORM:
1888 			fmtstr = "{lb=";
1889 			break;
1890 		case SB_STRFMT_JSON:
1891 		default:
1892 			fmtstr = "\"lb\":";
1893 			break;
1894 		}
1895 		sbuf_cat(buf, fmtstr);
1896 		stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1897 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1898 		    fmt, buf, objdump);
1899 
1900 		switch (fmt) {
1901 		case SB_STRFMT_FREEFORM:
1902 			fmtstr = ",ub=";
1903 			break;
1904 		case SB_STRFMT_JSON:
1905 		default:
1906 			fmtstr = ",\"ub\":";
1907 			break;
1908 		}
1909 		sbuf_cat(buf, fmtstr);
1910 		stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1911 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1912 		    fmt, buf, objdump);
1913 
1914 		switch (fmt) {
1915 		case SB_STRFMT_FREEFORM:
1916 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1917 			break;
1918 		case SB_STRFMT_JSON:
1919 		default:
1920 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1921 			break;
1922 		}
1923 		sbuf_printf(buf, fmtstr, is32bit ?
1924 		    VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1925 		    (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1926 		    bkts[i].cnt));
1927 
1928 		if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1929 			sbuf_putc(buf, ',');
1930 	}
1931 	if (fmt == SB_STRFMT_JSON)
1932 		sbuf_cat(buf, "]");
1933 }
1934 
1935 int
1936 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1937     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1938     struct sbuf *buf, int objdump)
1939 {
1940 	const char *fmtstr;
1941 
1942 	if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1943 	    vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1944 		return (EINVAL);
1945 
1946 	switch (vsd_dtype) {
1947 	case VSD_DTYPE_VOISTATE:
1948 		switch (fmt) {
1949 		case SB_STRFMT_FREEFORM:
1950 			fmtstr = "prev=";
1951 			break;
1952 		case SB_STRFMT_JSON:
1953 		default:
1954 			fmtstr = "\"prev\":";
1955 			break;
1956 		}
1957 		sbuf_cat(buf, fmtstr);
1958 		/*
1959 		 * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1960 		 */
1961 		stats_voistatdata_tostr(
1962 		    (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1963 		    voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1964 		break;
1965 	case VSD_DTYPE_INT_S32:
1966 		sbuf_printf(buf, "%d", vsd->int32.s32);
1967 		break;
1968 	case VSD_DTYPE_INT_U32:
1969 		sbuf_printf(buf, "%u", vsd->int32.u32);
1970 		break;
1971 	case VSD_DTYPE_INT_S64:
1972 		sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1973 		break;
1974 	case VSD_DTYPE_INT_U64:
1975 		sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1976 		break;
1977 	case VSD_DTYPE_INT_SLONG:
1978 		sbuf_printf(buf, "%ld", vsd->intlong.slong);
1979 		break;
1980 	case VSD_DTYPE_INT_ULONG:
1981 		sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1982 		break;
1983 	case VSD_DTYPE_Q_S32:
1984 		{
1985 		char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1986 		Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1987 		sbuf_cat(buf, qstr);
1988 		}
1989 		break;
1990 	case VSD_DTYPE_Q_U32:
1991 		{
1992 		char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1993 		Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1994 		sbuf_cat(buf, qstr);
1995 		}
1996 		break;
1997 	case VSD_DTYPE_Q_S64:
1998 		{
1999 		char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
2000 		Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
2001 		sbuf_cat(buf, qstr);
2002 		}
2003 		break;
2004 	case VSD_DTYPE_Q_U64:
2005 		{
2006 		char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
2007 		Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
2008 		sbuf_cat(buf, qstr);
2009 		}
2010 		break;
2011 	case VSD_DTYPE_CRHIST32:
2012 	case VSD_DTYPE_DRHIST32:
2013 	case VSD_DTYPE_DVHIST32:
2014 	case VSD_DTYPE_CRHIST64:
2015 	case VSD_DTYPE_DRHIST64:
2016 	case VSD_DTYPE_DVHIST64:
2017 		stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2018 		    vsd_dtype, vsd_sz, fmt, buf, objdump);
2019 		break;
2020 	case VSD_DTYPE_TDGSTCLUST32:
2021 	case VSD_DTYPE_TDGSTCLUST64:
2022 		stats_voistatdata_tdgst_tostr(voi_dtype,
2023 		    CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2024 		    objdump);
2025 		break;
2026 	default:
2027 		break;
2028 	}
2029 
2030 	return (sbuf_error(buf));
2031 }
2032 
2033 static void
2034 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2035     struct voistat *vs, struct sb_iter_ctx *ctx)
2036 {
2037 	struct sb_tostrcb_ctx *sctx;
2038 	struct metablob *tpl_mb;
2039 	struct sbuf *buf;
2040 	void *vsd;
2041 	uint8_t dump;
2042 
2043 	sctx = ctx->usrctx;
2044 	buf = sctx->buf;
2045 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2046 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2047 
2048 	if (ctx->flags & SB_IT_FIRST_CB) {
2049 		sbuf_printf(buf, "struct statsblobv1@%p", sb);
2050 		if (dump) {
2051 			sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2052 			    "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2053 			    "stats_off=%hu, statsdata_off=%hu",
2054 			    sb->abi, sb->endian, sb->maxsz, sb->cursz,
2055 			    sb->created, sb->lastrst, sb->flags, sb->stats_off,
2056 			    sb->statsdata_off);
2057 		}
2058 		sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2059 	}
2060 
2061 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2062 		sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2063 		if (v->id < 0)
2064 			return;
2065 		sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2066 		    tpl_mb->voi_meta[v->id].name);
2067 		if (dump)
2068 		    sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2069 		    "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2070 		    vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2071 	}
2072 
2073 	if (!dump && vs->stype <= 0)
2074 		return;
2075 
2076 	sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2077 	if (vs->stype < 0) {
2078 		sbuf_printf(buf, "%hhd", vs->stype);
2079 		return;
2080 	} else
2081 		sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2082 		    vs->errs);
2083 	vsd = BLOB_OFFSET(sb, vs->data_off);
2084 	if (dump)
2085 		sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2086 		    "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2087 		    vs->dsz, vs->data_off);
2088 
2089 	sbuf_cat(buf, "\n\t\t\tvoistatdata: ");
2090 	stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2091 	    sctx->fmt, buf, dump);
2092 }
2093 
2094 static void
2095 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2096     struct sb_iter_ctx *ctx)
2097 {
2098 	struct sb_tostrcb_ctx *sctx;
2099 	struct metablob *tpl_mb;
2100 	struct sbuf *buf;
2101 	const char *fmtstr;
2102 	void *vsd;
2103 	uint8_t dump;
2104 
2105 	sctx = ctx->usrctx;
2106 	buf = sctx->buf;
2107 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2108 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2109 
2110 	if (ctx->flags & SB_IT_FIRST_CB) {
2111 		sbuf_putc(buf, '{');
2112 		if (dump) {
2113 			sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2114 			    "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2115 			    "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2116 			    "\"statsdata_off\":%hu,", sb->abi,
2117 			    sb->endian, sb->maxsz, sb->cursz, sb->created,
2118 			    sb->lastrst, sb->flags, sb->stats_off,
2119 			    sb->statsdata_off);
2120 		}
2121 
2122 		if (tpl_mb == NULL)
2123 			fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2124 		else
2125 			fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2126 
2127 		sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2128 		    sb->tplhash);
2129 	}
2130 
2131 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2132 		if (dump) {
2133 			sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2134 			    v->id);
2135 			if (v->id < 0) {
2136 				sbuf_cat(buf, "},");
2137 				return;
2138 			}
2139 
2140 			if (tpl_mb == NULL)
2141 				fmtstr = ",\"name\":%s,\"flags\":%hu,"
2142 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2143 				    "\"stats_off\":%hu,";
2144 			else
2145 				fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2146 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2147 				    "\"stats_off\":%hu,";
2148 
2149 			sbuf_printf(buf, fmtstr, tpl_mb ?
2150 			    tpl_mb->voi_meta[v->id].name : "null", v->flags,
2151 			    vsd_dtype2name[v->dtype], v->voistatmaxid,
2152 			    v->stats_off);
2153 		} else {
2154 			if (tpl_mb == NULL) {
2155 				sbuf_printf(buf, "\"[%hd]\":{", v->id);
2156 			} else {
2157 				sbuf_printf(buf, "\"%s\":{",
2158 				    tpl_mb->voi_meta[v->id].name);
2159 			}
2160 		}
2161 		sbuf_cat(buf, "\"stats\":{");
2162 	}
2163 
2164 	vsd = BLOB_OFFSET(sb, vs->data_off);
2165 	if (dump) {
2166 		sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2167 		if (vs->stype < 0) {
2168 			sbuf_cat(buf, "{\"stype\":-1},");
2169 			return;
2170 		}
2171 		sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2172 		    "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2173 		    vs_stype2name[vs->stype], vs->errs, vs->flags,
2174 		    vsd_dtype2name[vs->dtype], vs->data_off);
2175 	} else if (vs->stype > 0) {
2176 		if (tpl_mb == NULL)
2177 			sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2178 		else
2179 			sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2180 	} else
2181 		return;
2182 
2183 	if ((vs->flags & VS_VSDVALID) || dump) {
2184 		if (!dump)
2185 			sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2186 		/* Simple non-compound VSD types need a key. */
2187 		if (!vsd_compoundtype[vs->dtype])
2188 			sbuf_cat(buf, "\"val\":");
2189 		stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2190 		    sctx->fmt, buf, dump);
2191 		sbuf_cat(buf, dump ? "}}" : "}");
2192 	} else
2193 		sbuf_cat(buf, dump ? "null}" : "null");
2194 
2195 	if (ctx->flags & SB_IT_LAST_VOISTAT)
2196 		sbuf_cat(buf, "}}");
2197 
2198 	if (ctx->flags & SB_IT_LAST_CB)
2199 		sbuf_cat(buf, "}}");
2200 	else
2201 		sbuf_putc(buf, ',');
2202 }
2203 
2204 static int
2205 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2206     struct sb_iter_ctx *ctx)
2207 {
2208 	struct sb_tostrcb_ctx *sctx;
2209 
2210 	sctx = ctx->usrctx;
2211 
2212 	switch (sctx->fmt) {
2213 	case SB_STRFMT_FREEFORM:
2214 		stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2215 		break;
2216 	case SB_STRFMT_JSON:
2217 		stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2218 		break;
2219 	default:
2220 		break;
2221 	}
2222 
2223 	return (sbuf_error(sctx->buf));
2224 }
2225 
2226 int
2227 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2228     enum sb_str_fmt fmt, uint32_t flags)
2229 {
2230 	struct sb_tostrcb_ctx sctx;
2231 	uint32_t iflags;
2232 
2233 	if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2234 	    fmt >= SB_STRFMT_NUM_FMTS)
2235 		return (EINVAL);
2236 
2237 	sctx.buf = buf;
2238 	sctx.fmt = fmt;
2239 	sctx.flags = flags;
2240 
2241 	if (flags & SB_TOSTR_META) {
2242 		if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2243 		    &sctx.tpl))
2244 			return (EINVAL);
2245 	} else
2246 		sctx.tpl = NULL;
2247 
2248 	iflags = 0;
2249 	if (flags & SB_TOSTR_OBJDUMP)
2250 		iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2251 	stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2252 
2253 	return (sbuf_error(buf));
2254 }
2255 
2256 static int
2257 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2258     struct voistat *vs, struct sb_iter_ctx *ctx)
2259 {
2260 	struct sb_visitcb_ctx *vctx;
2261 	struct sb_visit sbv;
2262 
2263 	vctx = ctx->usrctx;
2264 
2265 	sbv.tplhash = sb->tplhash;
2266 	sbv.voi_id = v->id;
2267 	sbv.voi_dtype = v->dtype;
2268 	sbv.vs_stype = vs->stype;
2269 	sbv.vs_dtype = vs->dtype;
2270 	sbv.vs_dsz = vs->dsz;
2271 	sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2272 	sbv.vs_errs = vs->errs;
2273 	sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2274 	    SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2275 	    SB_IT_LAST_VOISTAT);
2276 
2277 	return (vctx->cb(&sbv, vctx->usrctx));
2278 }
2279 
2280 int
2281 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2282     void *usrctx)
2283 {
2284 	struct sb_visitcb_ctx vctx;
2285 
2286 	if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2287 		return (EINVAL);
2288 
2289 	vctx.cb = func;
2290 	vctx.usrctx = usrctx;
2291 
2292 	stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2293 
2294 	return (0);
2295 }
2296 
2297 static int
2298 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2299     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2300 {
2301 	void *vsd;
2302 
2303 	if (vs->stype == VS_STYPE_VOISTATE)
2304 		return (0);
2305 
2306 	vsd = BLOB_OFFSET(sb, vs->data_off);
2307 
2308 	/* Perform the stat type's default reset action. */
2309 	switch (vs->stype) {
2310 	case VS_STYPE_SUM:
2311 		switch (vs->dtype) {
2312 		case VSD_DTYPE_Q_S32:
2313 			Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2314 			break;
2315 		case VSD_DTYPE_Q_U32:
2316 			Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2317 			break;
2318 		case VSD_DTYPE_Q_S64:
2319 			Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2320 			break;
2321 		case VSD_DTYPE_Q_U64:
2322 			Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2323 			break;
2324 		default:
2325 			bzero(vsd, vs->dsz);
2326 			break;
2327 		}
2328 		break;
2329 	case VS_STYPE_MAX:
2330 		switch (vs->dtype) {
2331 		case VSD_DTYPE_Q_S32:
2332 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2333 			    Q_IFMINVAL(VSD(q32, vsd)->sq32));
2334 			break;
2335 		case VSD_DTYPE_Q_U32:
2336 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2337 			    Q_IFMINVAL(VSD(q32, vsd)->uq32));
2338 			break;
2339 		case VSD_DTYPE_Q_S64:
2340 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2341 			    Q_IFMINVAL(VSD(q64, vsd)->sq64));
2342 			break;
2343 		case VSD_DTYPE_Q_U64:
2344 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2345 			    Q_IFMINVAL(VSD(q64, vsd)->uq64));
2346 			break;
2347 		default:
2348 			memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2349 			    vs->dsz);
2350 			break;
2351 		}
2352 		break;
2353 	case VS_STYPE_MIN:
2354 		switch (vs->dtype) {
2355 		case VSD_DTYPE_Q_S32:
2356 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2357 			    Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2358 			break;
2359 		case VSD_DTYPE_Q_U32:
2360 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2361 			    Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2362 			break;
2363 		case VSD_DTYPE_Q_S64:
2364 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2365 			    Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2366 			break;
2367 		case VSD_DTYPE_Q_U64:
2368 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2369 			    Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2370 			break;
2371 		default:
2372 			memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2373 			    vs->dsz);
2374 			break;
2375 		}
2376 		break;
2377 	case VS_STYPE_HIST:
2378 		{
2379 		/* Reset bucket counts. */
2380 		struct voistatdata_hist *hist;
2381 		int i, is32bit;
2382 		uint16_t nbkts;
2383 
2384 		hist = VSD(hist, vsd);
2385 		switch (vs->dtype) {
2386 		case VSD_DTYPE_CRHIST32:
2387 			nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2388 			is32bit = 1;
2389 			break;
2390 		case VSD_DTYPE_DRHIST32:
2391 			nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2392 			is32bit = 1;
2393 			break;
2394 		case VSD_DTYPE_DVHIST32:
2395 			nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2396 			is32bit = 1;
2397 			break;
2398 		case VSD_DTYPE_CRHIST64:
2399 			nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2400 			is32bit = 0;
2401 			break;
2402 		case VSD_DTYPE_DRHIST64:
2403 			nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2404 			is32bit = 0;
2405 			break;
2406 		case VSD_DTYPE_DVHIST64:
2407 			nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2408 			is32bit = 0;
2409 			break;
2410 		default:
2411 			return (0);
2412 		}
2413 
2414 		bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2415 		    is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2416 		for (i = nbkts - 1; i >= 0; i--) {
2417 			bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2418 			    bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2419 			    sizeof(uint64_t));
2420 		}
2421 		break;
2422 		}
2423 	case VS_STYPE_TDGST:
2424 		{
2425 		/* Reset sample count centroids array/tree. */
2426 		struct voistatdata_tdgst *tdgst;
2427 		struct ctdth32 *ctd32tree;
2428 		struct ctdth64 *ctd64tree;
2429 		struct voistatdata_tdgstctd32 *ctd32;
2430 		struct voistatdata_tdgstctd64 *ctd64;
2431 
2432 		tdgst = VSD(tdgst, vsd);
2433 		switch (vs->dtype) {
2434 		case VSD_DTYPE_TDGSTCLUST32:
2435 			VSD(tdgstclust32, tdgst)->smplcnt = 0;
2436 			VSD(tdgstclust32, tdgst)->compcnt = 0;
2437 			ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2438 			ARB_INIT(ctd32, ctdlnk, ctd32tree,
2439 			    ARB_MAXNODES(ctd32tree)) {
2440 				ctd32->cnt = 0;
2441 				Q_SIFVAL(ctd32->mu, 0);
2442 			}
2443 #ifdef DIAGNOSTIC
2444 			RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2445 #endif
2446 		break;
2447 		case VSD_DTYPE_TDGSTCLUST64:
2448 			VSD(tdgstclust64, tdgst)->smplcnt = 0;
2449 			VSD(tdgstclust64, tdgst)->compcnt = 0;
2450 			ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2451 			ARB_INIT(ctd64, ctdlnk, ctd64tree,
2452 			    ARB_MAXNODES(ctd64tree)) {
2453 				ctd64->cnt = 0;
2454 				Q_SIFVAL(ctd64->mu, 0);
2455 			}
2456 #ifdef DIAGNOSTIC
2457 			RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2458 #endif
2459 		break;
2460 		default:
2461 			return (0);
2462 		}
2463 		break;
2464 		}
2465 	default:
2466 		KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2467 		break;
2468 	}
2469 
2470 	vs->errs = 0;
2471 	vs->flags &= ~VS_VSDVALID;
2472 
2473 	return (0);
2474 }
2475 
2476 int
2477 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2478     struct statsblobv1 *src, uint32_t flags)
2479 {
2480 	int error;
2481 
2482 	if (src != NULL && src->abi == STATS_ABI_V1) {
2483 		error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2484 		if (!error) {
2485 			if (flags & SB_CLONE_RSTSRC) {
2486 				stats_v1_blob_iter(src,
2487 				    stats_v1_icb_reset_voistat, NULL, 0);
2488 				src->lastrst = stats_sbinuptime();
2489 			}
2490 			stats_v1_blob_finalise(*dst);
2491 		}
2492 	} else
2493 		error = EINVAL;
2494 
2495 	return (error);
2496 }
2497 
2498 static inline int
2499 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2500     struct voistatdata *voival, struct voistat *vs, void *vsd)
2501 {
2502 	int error;
2503 
2504 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2505 	    ("Unknown VSD dtype %d", vs->dtype));
2506 
2507 	error = 0;
2508 
2509 	switch (vs->dtype) {
2510 	case VSD_DTYPE_INT_S32:
2511 		if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2512 			VSD(int32, vsd)->s32 = voival->int32.s32;
2513 			vs->flags |= VS_VSDVALID;
2514 		}
2515 		break;
2516 	case VSD_DTYPE_INT_U32:
2517 		if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2518 			VSD(int32, vsd)->u32 = voival->int32.u32;
2519 			vs->flags |= VS_VSDVALID;
2520 		}
2521 		break;
2522 	case VSD_DTYPE_INT_S64:
2523 		if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2524 			VSD(int64, vsd)->s64 = voival->int64.s64;
2525 			vs->flags |= VS_VSDVALID;
2526 		}
2527 		break;
2528 	case VSD_DTYPE_INT_U64:
2529 		if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2530 			VSD(int64, vsd)->u64 = voival->int64.u64;
2531 			vs->flags |= VS_VSDVALID;
2532 		}
2533 		break;
2534 	case VSD_DTYPE_INT_SLONG:
2535 		if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2536 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2537 			vs->flags |= VS_VSDVALID;
2538 		}
2539 		break;
2540 	case VSD_DTYPE_INT_ULONG:
2541 		if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2542 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2543 			vs->flags |= VS_VSDVALID;
2544 		}
2545 		break;
2546 	case VSD_DTYPE_Q_S32:
2547 		if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2548 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2549 		    voival->q32.sq32)))) {
2550 			vs->flags |= VS_VSDVALID;
2551 		}
2552 		break;
2553 	case VSD_DTYPE_Q_U32:
2554 		if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2555 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2556 		    voival->q32.uq32)))) {
2557 			vs->flags |= VS_VSDVALID;
2558 		}
2559 		break;
2560 	case VSD_DTYPE_Q_S64:
2561 		if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2562 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2563 		    voival->q64.sq64)))) {
2564 			vs->flags |= VS_VSDVALID;
2565 		}
2566 		break;
2567 	case VSD_DTYPE_Q_U64:
2568 		if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2569 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2570 		    voival->q64.uq64)))) {
2571 			vs->flags |= VS_VSDVALID;
2572 		}
2573 		break;
2574 	default:
2575 		error = EINVAL;
2576 		break;
2577 	}
2578 
2579 	return (error);
2580 }
2581 
2582 static inline int
2583 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2584     struct voistatdata *voival, struct voistat *vs, void *vsd)
2585 {
2586 	int error;
2587 
2588 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2589 	    ("Unknown VSD dtype %d", vs->dtype));
2590 
2591 	error = 0;
2592 
2593 	switch (vs->dtype) {
2594 	case VSD_DTYPE_INT_S32:
2595 		if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2596 			VSD(int32, vsd)->s32 = voival->int32.s32;
2597 			vs->flags |= VS_VSDVALID;
2598 		}
2599 		break;
2600 	case VSD_DTYPE_INT_U32:
2601 		if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2602 			VSD(int32, vsd)->u32 = voival->int32.u32;
2603 			vs->flags |= VS_VSDVALID;
2604 		}
2605 		break;
2606 	case VSD_DTYPE_INT_S64:
2607 		if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2608 			VSD(int64, vsd)->s64 = voival->int64.s64;
2609 			vs->flags |= VS_VSDVALID;
2610 		}
2611 		break;
2612 	case VSD_DTYPE_INT_U64:
2613 		if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2614 			VSD(int64, vsd)->u64 = voival->int64.u64;
2615 			vs->flags |= VS_VSDVALID;
2616 		}
2617 		break;
2618 	case VSD_DTYPE_INT_SLONG:
2619 		if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2620 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2621 			vs->flags |= VS_VSDVALID;
2622 		}
2623 		break;
2624 	case VSD_DTYPE_INT_ULONG:
2625 		if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2626 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2627 			vs->flags |= VS_VSDVALID;
2628 		}
2629 		break;
2630 	case VSD_DTYPE_Q_S32:
2631 		if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2632 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2633 		    voival->q32.sq32)))) {
2634 			vs->flags |= VS_VSDVALID;
2635 		}
2636 		break;
2637 	case VSD_DTYPE_Q_U32:
2638 		if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2639 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2640 		    voival->q32.uq32)))) {
2641 			vs->flags |= VS_VSDVALID;
2642 		}
2643 		break;
2644 	case VSD_DTYPE_Q_S64:
2645 		if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2646 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2647 		    voival->q64.sq64)))) {
2648 			vs->flags |= VS_VSDVALID;
2649 		}
2650 		break;
2651 	case VSD_DTYPE_Q_U64:
2652 		if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2653 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2654 		    voival->q64.uq64)))) {
2655 			vs->flags |= VS_VSDVALID;
2656 		}
2657 		break;
2658 	default:
2659 		error = EINVAL;
2660 		break;
2661 	}
2662 
2663 	return (error);
2664 }
2665 
2666 static inline int
2667 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2668     struct voistatdata *voival, struct voistat *vs, void *vsd)
2669 {
2670 	int error;
2671 
2672 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2673 	    ("Unknown VSD dtype %d", vs->dtype));
2674 
2675 	error = 0;
2676 
2677 	switch (vs->dtype) {
2678 	case VSD_DTYPE_INT_S32:
2679 		VSD(int32, vsd)->s32 += voival->int32.s32;
2680 		break;
2681 	case VSD_DTYPE_INT_U32:
2682 		VSD(int32, vsd)->u32 += voival->int32.u32;
2683 		break;
2684 	case VSD_DTYPE_INT_S64:
2685 		VSD(int64, vsd)->s64 += voival->int64.s64;
2686 		break;
2687 	case VSD_DTYPE_INT_U64:
2688 		VSD(int64, vsd)->u64 += voival->int64.u64;
2689 		break;
2690 	case VSD_DTYPE_INT_SLONG:
2691 		VSD(intlong, vsd)->slong += voival->intlong.slong;
2692 		break;
2693 	case VSD_DTYPE_INT_ULONG:
2694 		VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2695 		break;
2696 	case VSD_DTYPE_Q_S32:
2697 		error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2698 		break;
2699 	case VSD_DTYPE_Q_U32:
2700 		error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2701 		break;
2702 	case VSD_DTYPE_Q_S64:
2703 		error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2704 		break;
2705 	case VSD_DTYPE_Q_U64:
2706 		error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2707 		break;
2708 	default:
2709 		error = EINVAL;
2710 		break;
2711 	}
2712 
2713 	if (!error)
2714 		vs->flags |= VS_VSDVALID;
2715 
2716 	return (error);
2717 }
2718 
2719 static inline int
2720 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2721     struct voistat *vs, struct voistatdata_hist *hist)
2722 {
2723 	struct voistatdata_numeric *bkt_lb, *bkt_ub;
2724 	uint64_t *oob64, *cnt64;
2725 	uint32_t *oob32, *cnt32;
2726 	int error, i, found, is32bit, has_ub, eq_only;
2727 
2728 	error = 0;
2729 
2730 	switch (vs->dtype) {
2731 	case VSD_DTYPE_CRHIST32:
2732 		i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2733 		is32bit = 1;
2734 		has_ub = eq_only = 0;
2735 		oob32 = &VSD(crhist32, hist)->oob;
2736 		break;
2737 	case VSD_DTYPE_DRHIST32:
2738 		i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2739 		is32bit = has_ub = 1;
2740 		eq_only = 0;
2741 		oob32 = &VSD(drhist32, hist)->oob;
2742 		break;
2743 	case VSD_DTYPE_DVHIST32:
2744 		i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2745 		is32bit = eq_only = 1;
2746 		has_ub = 0;
2747 		oob32 = &VSD(dvhist32, hist)->oob;
2748 		break;
2749 	case VSD_DTYPE_CRHIST64:
2750 		i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2751 		is32bit = has_ub = eq_only = 0;
2752 		oob64 = &VSD(crhist64, hist)->oob;
2753 		break;
2754 	case VSD_DTYPE_DRHIST64:
2755 		i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2756 		is32bit = eq_only = 0;
2757 		has_ub = 1;
2758 		oob64 = &VSD(drhist64, hist)->oob;
2759 		break;
2760 	case VSD_DTYPE_DVHIST64:
2761 		i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2762 		is32bit = has_ub = 0;
2763 		eq_only = 1;
2764 		oob64 = &VSD(dvhist64, hist)->oob;
2765 		break;
2766 	default:
2767 		return (EINVAL);
2768 	}
2769 	i--; /* Adjust for 0-based array index. */
2770 
2771 	/* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2772 	for (found = 0; i >= 0 && !found; i--) {
2773 		switch (vs->dtype) {
2774 		case VSD_DTYPE_CRHIST32:
2775 			bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2776 			cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2777 			break;
2778 		case VSD_DTYPE_DRHIST32:
2779 			bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2780 			bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2781 			cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2782 			break;
2783 		case VSD_DTYPE_DVHIST32:
2784 			bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2785 			cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2786 			break;
2787 		case VSD_DTYPE_CRHIST64:
2788 			bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2789 			cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2790 			break;
2791 		case VSD_DTYPE_DRHIST64:
2792 			bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2793 			bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2794 			cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2795 			break;
2796 		case VSD_DTYPE_DVHIST64:
2797 			bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2798 			cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2799 			break;
2800 		default:
2801 			return (EINVAL);
2802 		}
2803 
2804 		switch (voi_dtype) {
2805 		case VSD_DTYPE_INT_S32:
2806 			if (voival->int32.s32 >= bkt_lb->int32.s32) {
2807 				if ((eq_only && voival->int32.s32 ==
2808 				    bkt_lb->int32.s32) ||
2809 				    (!eq_only && (!has_ub ||
2810 				    voival->int32.s32 < bkt_ub->int32.s32)))
2811 					found = 1;
2812 			}
2813 			break;
2814 		case VSD_DTYPE_INT_U32:
2815 			if (voival->int32.u32 >= bkt_lb->int32.u32) {
2816 				if ((eq_only && voival->int32.u32 ==
2817 				    bkt_lb->int32.u32) ||
2818 				    (!eq_only && (!has_ub ||
2819 				    voival->int32.u32 < bkt_ub->int32.u32)))
2820 					found = 1;
2821 			}
2822 			break;
2823 		case VSD_DTYPE_INT_S64:
2824 			if (voival->int64.s64 >= bkt_lb->int64.s64)
2825 				if ((eq_only && voival->int64.s64 ==
2826 				    bkt_lb->int64.s64) ||
2827 				    (!eq_only && (!has_ub ||
2828 				    voival->int64.s64 < bkt_ub->int64.s64)))
2829 					found = 1;
2830 			break;
2831 		case VSD_DTYPE_INT_U64:
2832 			if (voival->int64.u64 >= bkt_lb->int64.u64)
2833 				if ((eq_only && voival->int64.u64 ==
2834 				    bkt_lb->int64.u64) ||
2835 				    (!eq_only && (!has_ub ||
2836 				    voival->int64.u64 < bkt_ub->int64.u64)))
2837 					found = 1;
2838 			break;
2839 		case VSD_DTYPE_INT_SLONG:
2840 			if (voival->intlong.slong >= bkt_lb->intlong.slong)
2841 				if ((eq_only && voival->intlong.slong ==
2842 				    bkt_lb->intlong.slong) ||
2843 				    (!eq_only && (!has_ub ||
2844 				    voival->intlong.slong <
2845 				    bkt_ub->intlong.slong)))
2846 					found = 1;
2847 			break;
2848 		case VSD_DTYPE_INT_ULONG:
2849 			if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2850 				if ((eq_only && voival->intlong.ulong ==
2851 				    bkt_lb->intlong.ulong) ||
2852 				    (!eq_only && (!has_ub ||
2853 				    voival->intlong.ulong <
2854 				    bkt_ub->intlong.ulong)))
2855 					found = 1;
2856 			break;
2857 		case VSD_DTYPE_Q_S32:
2858 			if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2859 				if ((eq_only && Q_QEQ(voival->q32.sq32,
2860 				    bkt_lb->q32.sq32)) ||
2861 				    (!eq_only && (!has_ub ||
2862 				    Q_QLTQ(voival->q32.sq32,
2863 				    bkt_ub->q32.sq32))))
2864 					found = 1;
2865 			break;
2866 		case VSD_DTYPE_Q_U32:
2867 			if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2868 				if ((eq_only && Q_QEQ(voival->q32.uq32,
2869 				    bkt_lb->q32.uq32)) ||
2870 				    (!eq_only && (!has_ub ||
2871 				    Q_QLTQ(voival->q32.uq32,
2872 				    bkt_ub->q32.uq32))))
2873 					found = 1;
2874 			break;
2875 		case VSD_DTYPE_Q_S64:
2876 			if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2877 				if ((eq_only && Q_QEQ(voival->q64.sq64,
2878 				    bkt_lb->q64.sq64)) ||
2879 				    (!eq_only && (!has_ub ||
2880 				    Q_QLTQ(voival->q64.sq64,
2881 				    bkt_ub->q64.sq64))))
2882 					found = 1;
2883 			break;
2884 		case VSD_DTYPE_Q_U64:
2885 			if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2886 				if ((eq_only && Q_QEQ(voival->q64.uq64,
2887 				    bkt_lb->q64.uq64)) ||
2888 				    (!eq_only && (!has_ub ||
2889 				    Q_QLTQ(voival->q64.uq64,
2890 				    bkt_ub->q64.uq64))))
2891 					found = 1;
2892 			break;
2893 		default:
2894 			break;
2895 		}
2896 	}
2897 
2898 	if (found) {
2899 		if (is32bit)
2900 			*cnt32 += 1;
2901 		else
2902 			*cnt64 += 1;
2903 	} else {
2904 		if (is32bit)
2905 			*oob32 += 1;
2906 		else
2907 			*oob64 += 1;
2908 	}
2909 
2910 	vs->flags |= VS_VSDVALID;
2911 	return (error);
2912 }
2913 
2914 static inline int
2915 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2916     struct voistatdata_tdgst *tdgst, int attempt)
2917 {
2918 	struct ctdth32 *ctd32tree;
2919 	struct ctdth64 *ctd64tree;
2920 	struct voistatdata_tdgstctd32 *ctd32;
2921 	struct voistatdata_tdgstctd64 *ctd64;
2922 	uint64_t ebits, idxmask;
2923 	uint32_t bitsperidx, nebits;
2924 	int error, idx, is32bit, maxctds, remctds, tmperr;
2925 
2926 	error = 0;
2927 
2928 	switch (vs_dtype) {
2929 	case VSD_DTYPE_TDGSTCLUST32:
2930 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2931 		if (!ARB_FULL(ctd32tree))
2932 			return (0);
2933 		VSD(tdgstclust32, tdgst)->compcnt++;
2934 		maxctds = remctds = ARB_MAXNODES(ctd32tree);
2935 		ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2936 		VSD(tdgstclust32, tdgst)->smplcnt = 0;
2937 		is32bit = 1;
2938 		ctd64tree = NULL;
2939 		ctd64 = NULL;
2940 #ifdef DIAGNOSTIC
2941 		RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2942 #endif
2943 		break;
2944 	case VSD_DTYPE_TDGSTCLUST64:
2945 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2946 		if (!ARB_FULL(ctd64tree))
2947 			return (0);
2948 		VSD(tdgstclust64, tdgst)->compcnt++;
2949 		maxctds = remctds = ARB_MAXNODES(ctd64tree);
2950 		ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2951 		VSD(tdgstclust64, tdgst)->smplcnt = 0;
2952 		is32bit = 0;
2953 		ctd32tree = NULL;
2954 		ctd32 = NULL;
2955 #ifdef DIAGNOSTIC
2956 		RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2957 #endif
2958 		break;
2959 	default:
2960 		return (EINVAL);
2961 	}
2962 
2963 	/*
2964 	 * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2965 	 * re-inserting the mu/cnt of each as a value and corresponding weight.
2966 	 */
2967 
2968 	/*
2969 	 * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
2970 	 * RAND_MAX happens to be approximately 31 bits (range [0,
2971 	 * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
2972 	 * the code is compiled in userspace, it gets the random(3) behavior,
2973 	 * which has expected range [0, 0x7fffffff].
2974 	 */
2975 #define	bitsperrand 31
2976 	ebits = 0;
2977 	nebits = 0;
2978 	bitsperidx = fls(maxctds);
2979 	KASSERT(bitsperidx <= sizeof(ebits) << 3,
2980 	    ("%s: bitsperidx=%d, ebits=%d",
2981 	    __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2982 	idxmask = (UINT64_C(1) << bitsperidx) - 1;
2983 
2984 	/* Initialise the free list with randomised centroid indices. */
2985 	for (; remctds > 0; remctds--) {
2986 		while (nebits < bitsperidx) {
2987 			ebits |= ((uint64_t)random()) << nebits;
2988 			nebits += bitsperrand;
2989 			if (nebits > (sizeof(ebits) << 3))
2990 				nebits = sizeof(ebits) << 3;
2991 		}
2992 		idx = ebits & idxmask;
2993 		nebits -= bitsperidx;
2994 		ebits >>= bitsperidx;
2995 
2996 		/*
2997 		 * Select the next centroid to put on the ARB free list. We
2998 		 * start with the centroid at our randomly selected array index,
2999 		 * and work our way forwards until finding one (the latter
3000 		 * aspect reduces re-insertion randomness, but is good enough).
3001 		 */
3002 		do {
3003 			if (idx >= maxctds)
3004 				idx %= maxctds;
3005 
3006 			if (is32bit)
3007 				ctd32 = ARB_NODE(ctd32tree, idx);
3008 			else
3009 				ctd64 = ARB_NODE(ctd64tree, idx);
3010 		} while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3011 		    ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3012 
3013 		/* Put the centroid on the ARB free list. */
3014 		if (is32bit)
3015 			ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3016 		else
3017 			ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3018 	}
3019 
3020 	/*
3021 	 * The free list now contains the randomised indices of every centroid.
3022 	 * Walk the free list from start to end, re-inserting each centroid's
3023 	 * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3024 	 * we re-insert values from during each loop iteration, so we must latch
3025 	 * the index of the next free list centroid before the re-insertion
3026 	 * call. The previous loop above should have left the centroid pointer
3027 	 * pointing to the element at the head of the free list.
3028 	 */
3029 	KASSERT((is32bit ?
3030 	    ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3031 	    ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3032 	    ("%s: t-digest ARB@%p free list bug", __func__,
3033 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3034 	remctds = maxctds;
3035 	while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3036 		tmperr = 0;
3037 		if (is32bit) {
3038 			s64q_t x;
3039 
3040 			idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3041 			/* Cloning a s32q_t into a s64q_t should never fail. */
3042 			tmperr = Q_QCLONEQ(&x, ctd32->mu);
3043 			tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3044 			    vs_dtype, tdgst, x, ctd32->cnt, attempt);
3045 			ctd32 = ARB_NODE(ctd32tree, idx);
3046 			KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3047 			    ("%s: t-digest ARB@%p free list bug", __func__,
3048 			    ctd32tree));
3049 		} else {
3050 			idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3051 			tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3052 			    ctd64->mu, ctd64->cnt, attempt);
3053 			ctd64 = ARB_NODE(ctd64tree, idx);
3054 			KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3055 			    ("%s: t-digest ARB@%p free list bug", __func__,
3056 			    ctd64tree));
3057 		}
3058 		/*
3059 		 * This process should not produce errors, bugs notwithstanding.
3060 		 * Just in case, latch any errors and attempt all re-insertions.
3061 		 */
3062 		error = tmperr ? tmperr : error;
3063 		remctds--;
3064 	}
3065 
3066 	KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3067 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3068 
3069 	return (error);
3070 }
3071 
3072 static inline int
3073 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3074     s64q_t x, uint64_t weight, int attempt)
3075 {
3076 #ifdef DIAGNOSTIC
3077 	char qstr[Q_MAXSTRLEN(x, 10)];
3078 #endif
3079 	struct ctdth32 *ctd32tree;
3080 	struct ctdth64 *ctd64tree;
3081 	void *closest, *cur, *lb, *ub;
3082 	struct voistatdata_tdgstctd32 *ctd32;
3083 	struct voistatdata_tdgstctd64 *ctd64;
3084 	uint64_t cnt, smplcnt, sum, tmpsum;
3085 	s64q_t k, minz, q, z;
3086 	int error, is32bit, n;
3087 
3088 	error = 0;
3089 	minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3090 
3091 	switch (vs_dtype) {
3092 	case VSD_DTYPE_TDGSTCLUST32:
3093 		if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3094 			error = EOVERFLOW;
3095 		smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3096 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3097 		is32bit = 1;
3098 		ctd64tree = NULL;
3099 		ctd64 = NULL;
3100 		break;
3101 	case VSD_DTYPE_TDGSTCLUST64:
3102 		if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3103 			error = EOVERFLOW;
3104 		smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3105 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3106 		is32bit = 0;
3107 		ctd32tree = NULL;
3108 		ctd32 = NULL;
3109 		break;
3110 	default:
3111 		error = EINVAL;
3112 		break;
3113 	}
3114 
3115 	if (error)
3116 		return (error);
3117 
3118 	/*
3119 	 * Inspired by Ted Dunning's AVLTreeDigest.java
3120 	 */
3121 	do {
3122 #if defined(DIAGNOSTIC)
3123 		KASSERT(attempt < 5,
3124 		    ("%s: Too many attempts", __func__));
3125 #endif
3126 		if (attempt >= 5)
3127 			return (EAGAIN);
3128 
3129 		Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3130 		closest = ub = NULL;
3131 		sum = tmpsum = 0;
3132 
3133 		if (is32bit)
3134 			lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3135 		else
3136 			lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3137 
3138 		if (lb == NULL) /* Empty tree. */
3139 			lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3140 			    (void *)ARB_ROOT(ctd64tree));
3141 
3142 		/*
3143 		 * Find the set of centroids with minimum distance to x and
3144 		 * compute the sum of counts for all centroids with mean less
3145 		 * than the first centroid in the set.
3146 		 */
3147 		for (; cur != NULL;
3148 		    cur = (is32bit ?
3149 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3150 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3151 			if (is32bit) {
3152 				cnt = ctd32->cnt;
3153 				KASSERT(Q_PRECEQ(ctd32->mu, x),
3154 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3155 				    Q_RELPREC(ctd32->mu, x)));
3156 				/* Ok to assign as both have same precision. */
3157 				z = ctd32->mu;
3158 			} else {
3159 				cnt = ctd64->cnt;
3160 				KASSERT(Q_PRECEQ(ctd64->mu, x),
3161 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3162 				    Q_RELPREC(ctd64->mu, x)));
3163 				/* Ok to assign as both have same precision. */
3164 				z = ctd64->mu;
3165 			}
3166 
3167 			error = Q_QSUBQ(&z, x);
3168 #if defined(DIAGNOSTIC)
3169 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3170 			    error));
3171 #endif
3172 			if (error)
3173 				return (error);
3174 
3175 			z = Q_QABS(z);
3176 			if (Q_QLTQ(z, minz)) {
3177 				minz = z;
3178 				lb = cur;
3179 				sum = tmpsum;
3180 				tmpsum += cnt;
3181 			} else if (Q_QGTQ(z, minz)) {
3182 				ub = cur;
3183 				break;
3184 			}
3185 		}
3186 
3187 		cur = (is32bit ?
3188 		    (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3189 		    (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3190 
3191 		for (n = 0; cur != ub; cur = (is32bit ?
3192 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3193 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3194 			if (is32bit)
3195 				cnt = ctd32->cnt;
3196 			else
3197 				cnt = ctd64->cnt;
3198 
3199 			q = Q_CTRLINI(16);
3200 			if (smplcnt == 1)
3201 				error = Q_QFRACI(&q, 1, 2);
3202 			else
3203 				/* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3204 				error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3205 				    (smplcnt - 1) << 1);
3206 			k = q;
3207 			/* k = q x 4 x samplcnt x attempt */
3208 			error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3209 			/* k = k x (1 - q) */
3210 			error |= Q_QSUBI(&q, 1);
3211 			q = Q_QABS(q);
3212 			error |= Q_QMULQ(&k, q);
3213 #if defined(DIAGNOSTIC)
3214 #if !defined(_KERNEL)
3215 			double q_dbl, k_dbl, q2d, k2d;
3216 			q2d = Q_Q2D(q);
3217 			k2d = Q_Q2D(k);
3218 			q_dbl = smplcnt == 1 ? 0.5 :
3219 			    (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3220 			k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3221 			/*
3222 			 * If the difference between q and q_dbl is greater than
3223 			 * the fractional precision of q, something is off.
3224 			 * NB: q is holding the value of 1 - q
3225 			 */
3226 			q_dbl = 1.0 - q_dbl;
3227 			KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3228 			    (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3229 			    ("Q-type q bad precision"));
3230 			KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3231 			    1.0 + (0.01 * smplcnt),
3232 			    ("Q-type k bad precision"));
3233 #endif /* !_KERNEL */
3234 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3235 			    error));
3236 #endif /* DIAGNOSTIC */
3237 			if (error)
3238 				return (error);
3239 			if ((is32bit && ((ctd32->cnt + weight) <=
3240 			    (uint64_t)Q_GIVAL(k))) ||
3241 			    (!is32bit && ((ctd64->cnt + weight) <=
3242 			    (uint64_t)Q_GIVAL(k)))) {
3243 				n++;
3244 				/* random() produces 31 bits. */
3245 				if (random() < (INT32_MAX / n))
3246 					closest = cur;
3247 			}
3248 			sum += cnt;
3249 		}
3250 	} while (closest == NULL &&
3251 	    (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3252 	    (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3253 	    attempt++)) == 0);
3254 
3255 	if (error)
3256 		return (error);
3257 
3258 	if (closest != NULL) {
3259 		/* Merge with an existing centroid. */
3260 		if (is32bit) {
3261 			ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3262 			error = Q_QSUBQ(&x, ctd32->mu);
3263 			/*
3264 			 * The following calculation "x / (cnt + weight)"
3265 			 * computes the amount by which to adjust the centroid's
3266 			 * mu value in order to merge in the VOI sample.
3267 			 *
3268 			 * It can underflow (Q_QDIVI() returns ERANGE) when the
3269 			 * user centroids' fractional precision (which is
3270 			 * inherited by 'x') is too low to represent the result.
3271 			 *
3272 			 * A sophisticated approach to dealing with this issue
3273 			 * would minimise accumulation of error by tracking
3274 			 * underflow per centroid and making an adjustment when
3275 			 * a LSB's worth of underflow has accumulated.
3276 			 *
3277 			 * A simpler approach is to let the result underflow
3278 			 * i.e. merge the VOI sample into the centroid without
3279 			 * adjusting the centroid's mu, and rely on the user to
3280 			 * specify their t-digest with sufficient centroid
3281 			 * fractional precision such that the accumulation of
3282 			 * error from multiple underflows is of no material
3283 			 * consequence to the centroid's final value of mu.
3284 			 *
3285 			 * For the moment, the latter approach is employed by
3286 			 * simply ignoring ERANGE here.
3287 			 *
3288 			 * XXXLAS: Per-centroid underflow tracking is likely too
3289 			 * onerous, but it probably makes sense to accumulate a
3290 			 * single underflow error variable across all centroids
3291 			 * and report it as part of the digest to provide
3292 			 * additional visibility into the digest's fidelity.
3293 			 */
3294 			error = error ? error :
3295 			    Q_QDIVI(&x, ctd32->cnt + weight);
3296 			if ((error && error != ERANGE)
3297 			    || (error = Q_QADDQ(&ctd32->mu, x))) {
3298 #ifdef DIAGNOSTIC
3299 				KASSERT(!error, ("%s: unexpected error %d",
3300 				    __func__, error));
3301 #endif
3302 				return (error);
3303 			}
3304 			ctd32->cnt += weight;
3305 			error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3306 			    NULL ? 0 : EALREADY;
3307 #ifdef DIAGNOSTIC
3308 			RB_REINSERT(rbctdth32,
3309 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3310 #endif
3311 		} else {
3312 			ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3313 			error = Q_QSUBQ(&x, ctd64->mu);
3314 			error = error ? error :
3315 			    Q_QDIVI(&x, ctd64->cnt + weight);
3316 			/* Refer to is32bit ERANGE discussion above. */
3317 			if ((error && error != ERANGE)
3318 			    || (error = Q_QADDQ(&ctd64->mu, x))) {
3319 				KASSERT(!error, ("%s: unexpected error %d",
3320 				    __func__, error));
3321 				return (error);
3322 			}
3323 			ctd64->cnt += weight;
3324 			error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3325 			    NULL ? 0 : EALREADY;
3326 #ifdef DIAGNOSTIC
3327 			RB_REINSERT(rbctdth64,
3328 			    &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3329 #endif
3330 		}
3331 	} else {
3332 		/*
3333 		 * Add a new centroid. If digest compression is working
3334 		 * correctly, there should always be at least one free.
3335 		 */
3336 		if (is32bit) {
3337 			ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3338 #ifdef DIAGNOSTIC
3339 			KASSERT(ctd32 != NULL,
3340 			    ("%s: t-digest@%p has no free centroids",
3341 			    __func__, tdgst));
3342 #endif
3343 			if (ctd32 == NULL)
3344 				return (EAGAIN);
3345 			if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3346 				return (error);
3347 			ctd32->cnt = weight;
3348 			error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3349 			    0 : EALREADY;
3350 #ifdef DIAGNOSTIC
3351 			RB_INSERT(rbctdth32,
3352 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3353 #endif
3354 		} else {
3355 			ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3356 #ifdef DIAGNOSTIC
3357 			KASSERT(ctd64 != NULL,
3358 			    ("%s: t-digest@%p has no free centroids",
3359 			    __func__, tdgst));
3360 #endif
3361 			if (ctd64 == NULL) /* Should not happen. */
3362 				return (EAGAIN);
3363 			/* Direct assignment ok as both have same type/prec. */
3364 			ctd64->mu = x;
3365 			ctd64->cnt = weight;
3366 			error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3367 			    0 : EALREADY;
3368 #ifdef DIAGNOSTIC
3369 			RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3370 			    tdgst)->rbctdtree, ctd64);
3371 #endif
3372 		}
3373 	}
3374 
3375 	if (is32bit)
3376 		VSD(tdgstclust32, tdgst)->smplcnt += weight;
3377 	else {
3378 		VSD(tdgstclust64, tdgst)->smplcnt += weight;
3379 
3380 #ifdef DIAGNOSTIC
3381 		struct rbctdth64 *rbctdtree =
3382 		    &VSD(tdgstclust64, tdgst)->rbctdtree;
3383 		struct voistatdata_tdgstctd64 *rbctd64;
3384 		int i = 0;
3385 		ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3386 			rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3387 			    RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3388 
3389 			if (i >= ARB_CURNODES(ctd64tree)
3390 			    || ctd64 != rbctd64
3391 			    || ARB_MIN(ctdth64, ctd64tree) !=
3392 			       RB_MIN(rbctdth64, rbctdtree)
3393 			    || ARB_MAX(ctdth64, ctd64tree) !=
3394 			       RB_MAX(rbctdth64, rbctdtree)
3395 			    || ARB_LEFTIDX(ctd64, ctdlnk) !=
3396 			       ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3397 			    || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3398 			       ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3399 			    || ARB_PARENTIDX(ctd64, ctdlnk) !=
3400 			       ARB_SELFIDX(ctd64tree,
3401 			       RB_PARENT(rbctd64, rblnk))) {
3402 				Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3403 				printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3404 				    "mu=%s\n",
3405 				    (int)ARB_SELFIDX(ctd64tree, ctd64),
3406 				    ARB_PARENTIDX(ctd64, ctdlnk),
3407 				    ARB_LEFTIDX(ctd64, ctdlnk),
3408 				    ARB_RIGHTIDX(ctd64, ctdlnk),
3409 				    ARB_COLOR(ctd64, ctdlnk),
3410 				    qstr);
3411 
3412 				Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3413 				    sizeof(qstr));
3414 				struct voistatdata_tdgstctd64 *parent;
3415 				parent = RB_PARENT(rbctd64, rblnk);
3416 				int rb_color =
3417 					parent == NULL ? 0 :
3418 					RB_LEFT(parent, rblnk) == rbctd64 ?
3419 					(_RB_BITSUP(parent, rblnk) & _RB_L) != 0 :
3420  					(_RB_BITSUP(parent, rblnk) & _RB_R) != 0;
3421 				printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3422 				    "mu=%s\n",
3423 				    (int)ARB_SELFIDX(ctd64tree, rbctd64),
3424 				    (int)ARB_SELFIDX(ctd64tree,
3425 				      RB_PARENT(rbctd64, rblnk)),
3426 				    (int)ARB_SELFIDX(ctd64tree,
3427 				      RB_LEFT(rbctd64, rblnk)),
3428 				    (int)ARB_SELFIDX(ctd64tree,
3429 				      RB_RIGHT(rbctd64, rblnk)),
3430 				    rb_color,
3431 				    qstr);
3432 
3433 				panic("RB@%p and ARB@%p trees differ\n",
3434 				    rbctdtree, ctd64tree);
3435 			}
3436 			i++;
3437 		}
3438 #endif /* DIAGNOSTIC */
3439 	}
3440 
3441 	return (error);
3442 }
3443 
3444 static inline int
3445 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3446     struct voistat *vs, struct voistatdata_tdgst *tdgst)
3447 {
3448 	s64q_t x;
3449 	int error;
3450 
3451 	error = 0;
3452 
3453 	switch (vs->dtype) {
3454 	case VSD_DTYPE_TDGSTCLUST32:
3455 		/* Use same precision as the user's centroids. */
3456 		Q_INI(&x, 0, 0, Q_NFBITS(
3457 		    ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3458 		break;
3459 	case VSD_DTYPE_TDGSTCLUST64:
3460 		/* Use same precision as the user's centroids. */
3461 		Q_INI(&x, 0, 0, Q_NFBITS(
3462 		    ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3463 		break;
3464 	default:
3465 		KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3466 		    vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3467 		    ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3468 		    __func__, vs->dtype));
3469 		return (EINVAL);
3470 	}
3471 
3472 	/*
3473 	 * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3474 	 * returning EOVERFLOW if the voival would have fit in a u64q_t.
3475 	 */
3476 	switch (voi_dtype) {
3477 	case VSD_DTYPE_INT_S32:
3478 		error = Q_QCPYVALI(&x, voival->int32.s32);
3479 		break;
3480 	case VSD_DTYPE_INT_U32:
3481 		error = Q_QCPYVALI(&x, voival->int32.u32);
3482 		break;
3483 	case VSD_DTYPE_INT_S64:
3484 		error = Q_QCPYVALI(&x, voival->int64.s64);
3485 		break;
3486 	case VSD_DTYPE_INT_U64:
3487 		error = Q_QCPYVALI(&x, voival->int64.u64);
3488 		break;
3489 	case VSD_DTYPE_INT_SLONG:
3490 		error = Q_QCPYVALI(&x, voival->intlong.slong);
3491 		break;
3492 	case VSD_DTYPE_INT_ULONG:
3493 		error = Q_QCPYVALI(&x, voival->intlong.ulong);
3494 		break;
3495 	case VSD_DTYPE_Q_S32:
3496 		error = Q_QCPYVALQ(&x, voival->q32.sq32);
3497 		break;
3498 	case VSD_DTYPE_Q_U32:
3499 		error = Q_QCPYVALQ(&x, voival->q32.uq32);
3500 		break;
3501 	case VSD_DTYPE_Q_S64:
3502 		error = Q_QCPYVALQ(&x, voival->q64.sq64);
3503 		break;
3504 	case VSD_DTYPE_Q_U64:
3505 		error = Q_QCPYVALQ(&x, voival->q64.uq64);
3506 		break;
3507 	default:
3508 		error = EINVAL;
3509 		break;
3510 	}
3511 
3512 	if (error ||
3513 	    (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3514 		return (error);
3515 
3516 	vs->flags |= VS_VSDVALID;
3517 	return (0);
3518 }
3519 
3520 int
3521 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3522     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3523 {
3524 	struct voi *v;
3525 	struct voistat *vs;
3526 	void *statevsd, *vsd;
3527 	int error, i, tmperr;
3528 
3529 	error = 0;
3530 
3531 	if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3532 	    voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3533 		return (EINVAL);
3534 	v = &sb->vois[voi_id];
3535 	if (voi_dtype != v->dtype || v->id < 0 ||
3536 	    ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3537 		return (EINVAL);
3538 
3539 	vs = BLOB_OFFSET(sb, v->stats_off);
3540 	if (v->flags & VOI_REQSTATE)
3541 		statevsd = BLOB_OFFSET(sb, vs->data_off);
3542 	else
3543 		statevsd = NULL;
3544 
3545 	if (flags & SB_VOI_RELUPDATE) {
3546 		switch (voi_dtype) {
3547 		case VSD_DTYPE_INT_S32:
3548 			voival->int32.s32 +=
3549 			    VSD(voistate, statevsd)->prev.int32.s32;
3550 			break;
3551 		case VSD_DTYPE_INT_U32:
3552 			voival->int32.u32 +=
3553 			    VSD(voistate, statevsd)->prev.int32.u32;
3554 			break;
3555 		case VSD_DTYPE_INT_S64:
3556 			voival->int64.s64 +=
3557 			    VSD(voistate, statevsd)->prev.int64.s64;
3558 			break;
3559 		case VSD_DTYPE_INT_U64:
3560 			voival->int64.u64 +=
3561 			    VSD(voistate, statevsd)->prev.int64.u64;
3562 			break;
3563 		case VSD_DTYPE_INT_SLONG:
3564 			voival->intlong.slong +=
3565 			    VSD(voistate, statevsd)->prev.intlong.slong;
3566 			break;
3567 		case VSD_DTYPE_INT_ULONG:
3568 			voival->intlong.ulong +=
3569 			    VSD(voistate, statevsd)->prev.intlong.ulong;
3570 			break;
3571 		case VSD_DTYPE_Q_S32:
3572 			error = Q_QADDQ(&voival->q32.sq32,
3573 			    VSD(voistate, statevsd)->prev.q32.sq32);
3574 			break;
3575 		case VSD_DTYPE_Q_U32:
3576 			error = Q_QADDQ(&voival->q32.uq32,
3577 			    VSD(voistate, statevsd)->prev.q32.uq32);
3578 			break;
3579 		case VSD_DTYPE_Q_S64:
3580 			error = Q_QADDQ(&voival->q64.sq64,
3581 			    VSD(voistate, statevsd)->prev.q64.sq64);
3582 			break;
3583 		case VSD_DTYPE_Q_U64:
3584 			error = Q_QADDQ(&voival->q64.uq64,
3585 			    VSD(voistate, statevsd)->prev.q64.uq64);
3586 			break;
3587 		default:
3588 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3589 			break;
3590 		}
3591 	}
3592 
3593 	if (error)
3594 		return (error);
3595 
3596 	for (i = v->voistatmaxid; i > 0; i--) {
3597 		vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3598 		if (vs->stype < 0)
3599 			continue;
3600 
3601 		vsd = BLOB_OFFSET(sb, vs->data_off);
3602 
3603 		switch (vs->stype) {
3604 		case VS_STYPE_MAX:
3605 			tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3606 			    vs, vsd);
3607 			break;
3608 		case VS_STYPE_MIN:
3609 			tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3610 			    vs, vsd);
3611 			break;
3612 		case VS_STYPE_SUM:
3613 			tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3614 			    vs, vsd);
3615 			break;
3616 		case VS_STYPE_HIST:
3617 			tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3618 			    vs, vsd);
3619 			break;
3620 		case VS_STYPE_TDGST:
3621 			tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3622 			    vs, vsd);
3623 			break;
3624 		default:
3625 			KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3626 			break;
3627 		}
3628 
3629 		if (tmperr) {
3630 			error = tmperr;
3631 			VS_INCERRS(vs);
3632 		}
3633 	}
3634 
3635 	if (statevsd) {
3636 		switch (voi_dtype) {
3637 		case VSD_DTYPE_INT_S32:
3638 			VSD(voistate, statevsd)->prev.int32.s32 =
3639 			    voival->int32.s32;
3640 			break;
3641 		case VSD_DTYPE_INT_U32:
3642 			VSD(voistate, statevsd)->prev.int32.u32 =
3643 			    voival->int32.u32;
3644 			break;
3645 		case VSD_DTYPE_INT_S64:
3646 			VSD(voistate, statevsd)->prev.int64.s64 =
3647 			    voival->int64.s64;
3648 			break;
3649 		case VSD_DTYPE_INT_U64:
3650 			VSD(voistate, statevsd)->prev.int64.u64 =
3651 			    voival->int64.u64;
3652 			break;
3653 		case VSD_DTYPE_INT_SLONG:
3654 			VSD(voistate, statevsd)->prev.intlong.slong =
3655 			    voival->intlong.slong;
3656 			break;
3657 		case VSD_DTYPE_INT_ULONG:
3658 			VSD(voistate, statevsd)->prev.intlong.ulong =
3659 			    voival->intlong.ulong;
3660 			break;
3661 		case VSD_DTYPE_Q_S32:
3662 			error = Q_QCPYVALQ(
3663 			    &VSD(voistate, statevsd)->prev.q32.sq32,
3664 			    voival->q32.sq32);
3665 			break;
3666 		case VSD_DTYPE_Q_U32:
3667 			error = Q_QCPYVALQ(
3668 			    &VSD(voistate, statevsd)->prev.q32.uq32,
3669 			    voival->q32.uq32);
3670 			break;
3671 		case VSD_DTYPE_Q_S64:
3672 			error = Q_QCPYVALQ(
3673 			    &VSD(voistate, statevsd)->prev.q64.sq64,
3674 			    voival->q64.sq64);
3675 			break;
3676 		case VSD_DTYPE_Q_U64:
3677 			error = Q_QCPYVALQ(
3678 			    &VSD(voistate, statevsd)->prev.q64.uq64,
3679 			    voival->q64.uq64);
3680 			break;
3681 		default:
3682 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3683 			break;
3684 		}
3685 	}
3686 
3687 	return (error);
3688 }
3689 
3690 #ifdef _KERNEL
3691 
3692 static void
3693 stats_init(void *arg)
3694 {
3695 
3696 }
3697 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3698 
3699 /*
3700  * Sysctl handler to display the list of available stats templates.
3701  */
3702 static int
3703 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3704 {
3705 	struct sbuf *s;
3706 	int err, i;
3707 
3708 	err = 0;
3709 
3710 	/* We can tolerate ntpl being stale, so do not take the lock. */
3711 	s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3712 	    ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3713 	if (s == NULL)
3714 		return (ENOMEM);
3715 
3716 	TPL_LIST_RLOCK();
3717 	for (i = 0; i < ntpl; i++) {
3718 		err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3719 		    tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3720 		if (err) {
3721 			/* Sbuf overflow condition. */
3722 			err = EOVERFLOW;
3723 			break;
3724 		}
3725 	}
3726 	TPL_LIST_RUNLOCK();
3727 
3728 	if (!err) {
3729 		sbuf_finish(s);
3730 		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3731 	}
3732 
3733 	sbuf_delete(s);
3734 	return (err);
3735 }
3736 
3737 /*
3738  * Called by subsystem-specific sysctls to report and/or parse the list of
3739  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3740  * conformant function pointer must be passed in as arg1, which is used to
3741  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3742  * a zero-initialised allocation of arg2-sized contextual memory is
3743  * heap-allocated and passed in to all subsystem callbacks made during the
3744  * operation of stats_tpl_sample_rates().
3745  *
3746  * XXXLAS: Assumes templates are never removed, which is currently true but may
3747  * need to be reworked in future if dynamic template management becomes a
3748  * requirement e.g. to support kernel module based templates.
3749  */
3750 int
3751 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3752 {
3753 	char kvpair_fmt[16], tplspec_fmt[16];
3754 	char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3755 	char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3756 	stats_tpl_sr_cb_t subsys_cb;
3757 	void *subsys_ctx;
3758 	char *buf, *new_rates_usr_str, *tpl_name_p;
3759 	struct stats_tpl_sample_rate *rates;
3760 	struct sbuf *s, _s;
3761 	uint32_t cum_pct, pct, tpl_hash;
3762 	int err, i, off, len, newlen, nrates;
3763 
3764 	buf = NULL;
3765 	rates = NULL;
3766 	err = nrates = 0;
3767 	subsys_cb = (stats_tpl_sr_cb_t)arg1;
3768 	KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3769 	if (arg2 > 0)
3770 		subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3771 	else
3772 		subsys_ctx = NULL;
3773 
3774 	/* Grab current count of subsystem rates. */
3775 	err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3776 	if (err)
3777 		goto done;
3778 
3779 	/* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3780 	len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3781 
3782 	if (req->oldptr != NULL || req->newptr != NULL)
3783 		buf = malloc(len, M_TEMP, M_WAITOK);
3784 
3785 	if (req->oldptr != NULL) {
3786 		if (nrates == 0) {
3787 			/* No rates, so return an empty string via oldptr. */
3788 			err = SYSCTL_OUT(req, "", 1);
3789 			if (err)
3790 				goto done;
3791 			goto process_new;
3792 		}
3793 
3794 		s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3795 
3796 		/* Grab locked count of, and ptr to, subsystem rates. */
3797 		err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3798 		    subsys_ctx);
3799 		if (err)
3800 			goto done;
3801 		TPL_LIST_RLOCK();
3802 		for (i = 0; i < nrates && !err; i++) {
3803 			err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3804 			    tpllist[rates[i].tpl_slot_id]->mb->tplname,
3805 			    tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3806 			    rates[i].tpl_sample_pct);
3807 		}
3808 		TPL_LIST_RUNLOCK();
3809 		/* Tell subsystem that we're done with its rates list. */
3810 		err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3811 		if (err)
3812 			goto done;
3813 
3814 		err = sbuf_finish(s);
3815 		if (err)
3816 			goto done; /* We lost a race for buf to be too small. */
3817 
3818 		/* Return the rendered string data via oldptr. */
3819 		err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3820 	} else {
3821 		/* Return the upper bound size for buffer sizing requests. */
3822 		err = SYSCTL_OUT(req, NULL, len);
3823 	}
3824 
3825 process_new:
3826 	if (err || req->newptr == NULL)
3827 		goto done;
3828 
3829 	newlen = req->newlen - req->newidx;
3830 	err = SYSCTL_IN(req, buf, newlen);
3831 	if (err)
3832 		goto done;
3833 
3834 	/*
3835 	 * Initialise format strings at run time.
3836 	 *
3837 	 * Write the max template spec string length into the
3838 	 * template_spec=percent key-value pair parsing format string as:
3839 	 *     " %<width>[^=]=%u %n"
3840 	 *
3841 	 * Write the max template name string length into the tplname:tplhash
3842 	 * parsing format string as:
3843 	 *     "%<width>[^:]:%u"
3844 	 *
3845 	 * Subtract 1 for \0 appended by sscanf().
3846 	 */
3847 	sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3848 	sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3849 
3850 	/*
3851 	 * Parse each CSV key-value pair specifying a template and its sample
3852 	 * percentage. Whitespace either side of a key-value pair is ignored.
3853 	 * Templates can be specified by name, hash, or name and hash per the
3854 	 * following formats (chars in [] are optional):
3855 	 *    ["]<tplname>["]=<percent>
3856 	 *    :hash=pct
3857 	 *    ["]<tplname>["]:hash=<percent>
3858 	 */
3859 	cum_pct = nrates = 0;
3860 	rates = NULL;
3861 	buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3862 	new_rates_usr_str = buf;
3863 	while (isspace(*new_rates_usr_str))
3864 		new_rates_usr_str++; /* Skip leading whitespace. */
3865 	while (*new_rates_usr_str != '\0') {
3866 		tpl_name_p = tpl_name;
3867 		tpl_name[0] = '\0';
3868 		tpl_hash = 0;
3869 		off = 0;
3870 
3871 		/*
3872 		 * Parse key-value pair which must perform 2 conversions, then
3873 		 * parse the template spec to extract either name, hash, or name
3874 		 * and hash depending on the three possible spec formats. The
3875 		 * tplspec_fmt format specifier parses name or name and hash
3876 		 * template specs, while the ":%u" format specifier parses
3877 		 * hash-only template specs. If parsing is successfull, ensure
3878 		 * the cumulative sampling percentage does not exceed 100.
3879 		 */
3880 		err = EINVAL;
3881 		if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3882 		    &off))
3883 			break;
3884 		if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3885 		    (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3886 			break;
3887 		if ((cum_pct += pct) > 100)
3888 			break;
3889 		err = 0;
3890 
3891 		/* Strip surrounding "" from template name if present. */
3892 		len = strlen(tpl_name);
3893 		if (len > 0) {
3894 			if (tpl_name[len - 1] == '"')
3895 				tpl_name[--len] = '\0';
3896 			if (tpl_name[0] == '"') {
3897 				tpl_name_p++;
3898 				len--;
3899 			}
3900 		}
3901 
3902 		rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3903 		    (nrates + 1) * sizeof(*rates), M_WAITOK);
3904 		rates[nrates].tpl_slot_id =
3905 		    stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3906 		if (rates[nrates].tpl_slot_id < 0) {
3907 			err = -rates[nrates].tpl_slot_id;
3908 			break;
3909 		}
3910 		rates[nrates].tpl_sample_pct = pct;
3911 		nrates++;
3912 		new_rates_usr_str += off;
3913 		if (*new_rates_usr_str != ',')
3914 			break; /* End-of-input or malformed. */
3915 		new_rates_usr_str++; /* Move past comma to next pair. */
3916 	}
3917 
3918 	if (!err) {
3919 		if ((new_rates_usr_str - buf) < newlen) {
3920 			/* Entire input has not been consumed. */
3921 			err = EINVAL;
3922 		} else {
3923 			/*
3924 			 * Give subsystem the new rates. They'll return the
3925 			 * appropriate rates pointer for us to garbage collect.
3926 			 */
3927 			err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3928 			    subsys_ctx);
3929 		}
3930 	}
3931 	stats_free(rates);
3932 
3933 done:
3934 	free(buf, M_TEMP);
3935 	free(subsys_ctx, M_TEMP);
3936 	return (err);
3937 }
3938 
3939 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
3940     "stats(9) MIB");
3941 
3942 SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
3943     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3944     stats_tpl_list_available, "A",
3945     "list the name/hash of all available stats(9) templates");
3946 
3947 #else /* ! _KERNEL */
3948 
3949 static void __attribute__ ((constructor))
3950 stats_constructor(void)
3951 {
3952 
3953 	pthread_rwlock_init(&tpllistlock, NULL);
3954 }
3955 
3956 static void __attribute__ ((destructor))
3957 stats_destructor(void)
3958 {
3959 
3960 	pthread_rwlock_destroy(&tpllistlock);
3961 }
3962 
3963 #endif /* _KERNEL */
3964