1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23eda14cbcSMatt Macy  * Use is subject to license terms.
24eda14cbcSMatt Macy  */
25eda14cbcSMatt Macy 
26eda14cbcSMatt Macy #ifndef	_SYS_KSTAT_H
27eda14cbcSMatt Macy #define	_SYS_KSTAT_H
28eda14cbcSMatt Macy 
29eda14cbcSMatt Macy 
30eda14cbcSMatt Macy 
31eda14cbcSMatt Macy /*
32eda14cbcSMatt Macy  * Definition of general kernel statistics structures and /dev/kstat ioctls
33eda14cbcSMatt Macy  */
34eda14cbcSMatt Macy 
35eda14cbcSMatt Macy #include <sys/types.h>
36eda14cbcSMatt Macy #include <sys/time.h>
37eda14cbcSMatt Macy 
38eda14cbcSMatt Macy #ifdef	__cplusplus
39eda14cbcSMatt Macy extern "C" {
40eda14cbcSMatt Macy #endif
41eda14cbcSMatt Macy 
42eda14cbcSMatt Macy typedef int	kid_t;		/* unique kstat id */
43eda14cbcSMatt Macy 
44eda14cbcSMatt Macy /*
45eda14cbcSMatt Macy  * Kernel statistics driver (/dev/kstat) ioctls
46eda14cbcSMatt Macy  */
47eda14cbcSMatt Macy 
48eda14cbcSMatt Macy #define	KSTAT_IOC_BASE		('K' << 8)
49eda14cbcSMatt Macy 
50eda14cbcSMatt Macy #define	KSTAT_IOC_CHAIN_ID	KSTAT_IOC_BASE | 0x01
51eda14cbcSMatt Macy #define	KSTAT_IOC_READ		KSTAT_IOC_BASE | 0x02
52eda14cbcSMatt Macy #define	KSTAT_IOC_WRITE		KSTAT_IOC_BASE | 0x03
53eda14cbcSMatt Macy 
54eda14cbcSMatt Macy /*
55eda14cbcSMatt Macy  * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
56eda14cbcSMatt Macy  *
57eda14cbcSMatt Macy  *	kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
58eda14cbcSMatt Macy  *	kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
59eda14cbcSMatt Macy  *	kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
60eda14cbcSMatt Macy  */
61eda14cbcSMatt Macy 
62eda14cbcSMatt Macy #define	KSTAT_STRLEN	255	/* 254 chars + NULL; must be 16 * n - 1 */
63eda14cbcSMatt Macy 
64eda14cbcSMatt Macy /*
65eda14cbcSMatt Macy  * The generic kstat header
66eda14cbcSMatt Macy  */
67eda14cbcSMatt Macy 
68eda14cbcSMatt Macy typedef struct kstat {
69eda14cbcSMatt Macy 	/*
70eda14cbcSMatt Macy 	 * Fields relevant to both kernel and user
71eda14cbcSMatt Macy 	 */
72eda14cbcSMatt Macy 	hrtime_t	ks_crtime;	/* creation time (from gethrtime()) */
73eda14cbcSMatt Macy 	struct kstat	*ks_next;	/* kstat chain linkage */
74eda14cbcSMatt Macy 	kid_t		ks_kid;		/* unique kstat ID */
75eda14cbcSMatt Macy 	char		ks_module[KSTAT_STRLEN]; /* provider module name */
76eda14cbcSMatt Macy 	uchar_t		ks_resv;	/* reserved, currently just padding */
77eda14cbcSMatt Macy 	int		ks_instance;	/* provider module's instance */
78eda14cbcSMatt Macy 	char		ks_name[KSTAT_STRLEN]; /* kstat name */
79eda14cbcSMatt Macy 	uchar_t		ks_type;	/* kstat data type */
80eda14cbcSMatt Macy 	char		ks_class[KSTAT_STRLEN]; /* kstat class */
81eda14cbcSMatt Macy 	uchar_t		ks_flags;	/* kstat flags */
82eda14cbcSMatt Macy 	void		*ks_data;	/* kstat type-specific data */
83eda14cbcSMatt Macy 	uint_t		ks_ndata;	/* # of type-specific data records */
84eda14cbcSMatt Macy 	size_t		ks_data_size;	/* total size of kstat data section */
85eda14cbcSMatt Macy 	hrtime_t	ks_snaptime;	/* time of last data snapshot */
86eda14cbcSMatt Macy 	/*
87eda14cbcSMatt Macy 	 * Fields relevant to kernel only
88eda14cbcSMatt Macy 	 */
89eda14cbcSMatt Macy 	int		(*ks_update)(struct kstat *, int); /* dynamic update */
90eda14cbcSMatt Macy 	void		*ks_private;	/* arbitrary provider-private data */
91eda14cbcSMatt Macy 	int		(*ks_snapshot)(struct kstat *, void *, int);
92eda14cbcSMatt Macy 	void		*ks_lock;	/* protects this kstat's data */
93eda14cbcSMatt Macy } kstat_t;
94eda14cbcSMatt Macy 
95eda14cbcSMatt Macy /*
96eda14cbcSMatt Macy  * kstat structure and locking strategy
97eda14cbcSMatt Macy  *
98eda14cbcSMatt Macy  * Each kstat consists of a header section (a kstat_t) and a data section.
99eda14cbcSMatt Macy  * The system maintains a set of kstats, protected by kstat_chain_lock.
100eda14cbcSMatt Macy  * kstat_chain_lock protects all additions to/deletions from this set,
101eda14cbcSMatt Macy  * as well as all changes to kstat headers.  kstat data sections are
102eda14cbcSMatt Macy  * *optionally* protected by the per-kstat ks_lock.  If ks_lock is non-NULL,
103eda14cbcSMatt Macy  * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
104eda14cbcSMatt Macy  * operations on that kstat.  It is up to the kstat provider to decide whether
105eda14cbcSMatt Macy  * guaranteeing consistent data to kstat clients is sufficiently important
106eda14cbcSMatt Macy  * to justify the locking cost.  Note, however, that most statistic updates
107eda14cbcSMatt Macy  * already occur under one of the provider's mutexes, so if the provider sets
108eda14cbcSMatt Macy  * ks_lock to point to that mutex, then kstat data locking is free.
109eda14cbcSMatt Macy  *
110eda14cbcSMatt Macy  * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
111eda14cbcSMatt Macy  * data-size races with kstat clients.
112eda14cbcSMatt Macy  *
113eda14cbcSMatt Macy  * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
114eda14cbcSMatt Macy  * in the kstat header so that users don't have to be exposed to all of the
115eda14cbcSMatt Macy  * kernel's lock-related data structures.
116eda14cbcSMatt Macy  */
117eda14cbcSMatt Macy 
118eda14cbcSMatt Macy #if	defined(_KERNEL)
119eda14cbcSMatt Macy 
120eda14cbcSMatt Macy #define	KSTAT_ENTER(k)	\
121eda14cbcSMatt Macy 	{ kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
122eda14cbcSMatt Macy 
123eda14cbcSMatt Macy #define	KSTAT_EXIT(k)	\
124eda14cbcSMatt Macy 	{ kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
125eda14cbcSMatt Macy 
126eda14cbcSMatt Macy #define	KSTAT_UPDATE(k, rw)		(*(k)->ks_update)((k), (rw))
127eda14cbcSMatt Macy 
128eda14cbcSMatt Macy #define	KSTAT_SNAPSHOT(k, buf, rw)	(*(k)->ks_snapshot)((k), (buf), (rw))
129eda14cbcSMatt Macy 
130eda14cbcSMatt Macy #endif	/* defined(_KERNEL) */
131eda14cbcSMatt Macy 
132eda14cbcSMatt Macy /*
133eda14cbcSMatt Macy  * kstat time
134eda14cbcSMatt Macy  *
135eda14cbcSMatt Macy  * All times associated with kstats (e.g. creation time, snapshot time,
136eda14cbcSMatt Macy  * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
137eda14cbcSMatt Macy  * as returned by gethrtime().  The accuracy of these timestamps is machine
138eda14cbcSMatt Macy  * dependent, but the precision (units) is the same across all platforms.
139eda14cbcSMatt Macy  */
140eda14cbcSMatt Macy 
141eda14cbcSMatt Macy /*
142eda14cbcSMatt Macy  * kstat identity (KID)
143eda14cbcSMatt Macy  *
144eda14cbcSMatt Macy  * Each kstat is assigned a unique KID (kstat ID) when it is added to the
145eda14cbcSMatt Macy  * global kstat chain.  The KID is used as a cookie by /dev/kstat to
146eda14cbcSMatt Macy  * request information about the corresponding kstat.  There is also
147eda14cbcSMatt Macy  * an identity associated with the entire kstat chain, kstat_chain_id,
148eda14cbcSMatt Macy  * which is bumped each time a kstat is added or deleted.  /dev/kstat uses
149eda14cbcSMatt Macy  * the chain ID to detect changes in the kstat chain (e.g., a new disk
150eda14cbcSMatt Macy  * coming online) between ioctl()s.
151eda14cbcSMatt Macy  */
152eda14cbcSMatt Macy 
153eda14cbcSMatt Macy /*
154eda14cbcSMatt Macy  * kstat module, kstat instance
155eda14cbcSMatt Macy  *
156eda14cbcSMatt Macy  * ks_module and ks_instance contain the name and instance of the module
157eda14cbcSMatt Macy  * that created the kstat.  In cases where there can only be one instance,
158eda14cbcSMatt Macy  * ks_instance is 0.  The kernel proper (/kernel/unix) uses "unix" as its
159eda14cbcSMatt Macy  * module name.
160eda14cbcSMatt Macy  */
161eda14cbcSMatt Macy 
162eda14cbcSMatt Macy /*
163eda14cbcSMatt Macy  * kstat name
164eda14cbcSMatt Macy  *
165eda14cbcSMatt Macy  * ks_name gives a meaningful name to a kstat.  The full kstat namespace
166eda14cbcSMatt Macy  * is module.instance.name, so the name only need be unique within a
167eda14cbcSMatt Macy  * module.  kstat_create() will fail if you try to create a kstat with
168eda14cbcSMatt Macy  * an already-used (ks_module, ks_instance, ks_name) triplet.  Spaces are
169eda14cbcSMatt Macy  * allowed in kstat names, but strongly discouraged, since they hinder
170eda14cbcSMatt Macy  * awk-style processing at user level.
171eda14cbcSMatt Macy  */
172eda14cbcSMatt Macy 
173eda14cbcSMatt Macy /*
174eda14cbcSMatt Macy  * kstat type
175eda14cbcSMatt Macy  *
176eda14cbcSMatt Macy  * The kstat mechanism provides several flavors of kstat data, defined
177eda14cbcSMatt Macy  * below.  The "raw" kstat type is just treated as an array of bytes; you
178eda14cbcSMatt Macy  * can use this to export any kind of data you want.
179eda14cbcSMatt Macy  *
180eda14cbcSMatt Macy  * Some kstat types allow multiple data structures per kstat, e.g.
181eda14cbcSMatt Macy  * KSTAT_TYPE_NAMED; others do not.  This is part of the spec for each
182eda14cbcSMatt Macy  * kstat data type.
183eda14cbcSMatt Macy  *
184eda14cbcSMatt Macy  * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES.  To
185eda14cbcSMatt Macy  * get this information, read out the standard system kstat "kstat_types".
186eda14cbcSMatt Macy  */
187eda14cbcSMatt Macy 
188eda14cbcSMatt Macy #define	KSTAT_TYPE_RAW		0	/* can be anything */
189eda14cbcSMatt Macy 					/* ks_ndata >= 1 */
190eda14cbcSMatt Macy #define	KSTAT_TYPE_NAMED	1	/* name/value pair */
191eda14cbcSMatt Macy 					/* ks_ndata >= 1 */
192eda14cbcSMatt Macy #define	KSTAT_TYPE_INTR		2	/* interrupt statistics */
193eda14cbcSMatt Macy 					/* ks_ndata == 1 */
194eda14cbcSMatt Macy #define	KSTAT_TYPE_IO		3	/* I/O statistics */
195eda14cbcSMatt Macy 					/* ks_ndata == 1 */
196eda14cbcSMatt Macy #define	KSTAT_TYPE_TIMER	4	/* event timer */
197eda14cbcSMatt Macy 					/* ks_ndata >= 1 */
198eda14cbcSMatt Macy 
199eda14cbcSMatt Macy #define	KSTAT_NUM_TYPES		5
200eda14cbcSMatt Macy 
201eda14cbcSMatt Macy /*
202eda14cbcSMatt Macy  * kstat class
203eda14cbcSMatt Macy  *
204eda14cbcSMatt Macy  * Each kstat can be characterized as belonging to some broad class
205eda14cbcSMatt Macy  * of statistics, e.g. disk, tape, net, vm, streams, etc.  This field
206eda14cbcSMatt Macy  * can be used as a filter to extract related kstats.  The following
207eda14cbcSMatt Macy  * values are currently in use: disk, tape, net, controller, vm, kvm,
208eda14cbcSMatt Macy  * hat, streams, kstat, and misc.  (The kstat class encompasses things
209eda14cbcSMatt Macy  * like kstat_types.)
210eda14cbcSMatt Macy  */
211eda14cbcSMatt Macy 
212eda14cbcSMatt Macy /*
213eda14cbcSMatt Macy  * kstat flags
214eda14cbcSMatt Macy  *
215eda14cbcSMatt Macy  * Any of the following flags may be passed to kstat_create().  They are
216eda14cbcSMatt Macy  * all zero by default.
217eda14cbcSMatt Macy  *
218eda14cbcSMatt Macy  *	KSTAT_FLAG_VIRTUAL:
219eda14cbcSMatt Macy  *
220eda14cbcSMatt Macy  *		Tells kstat_create() not to allocate memory for the
221eda14cbcSMatt Macy  *		kstat data section; instead, you will set the ks_data
222eda14cbcSMatt Macy  *		field to point to the data you wish to export.  This
223eda14cbcSMatt Macy  *		provides a convenient way to export existing data
224eda14cbcSMatt Macy  *		structures.
225eda14cbcSMatt Macy  *
226eda14cbcSMatt Macy  *	KSTAT_FLAG_VAR_SIZE:
227eda14cbcSMatt Macy  *
228eda14cbcSMatt Macy  *		The size of the kstat you are creating will vary over time.
229eda14cbcSMatt Macy  *		For example, you may want to use the kstat mechanism to
230eda14cbcSMatt Macy  *		export a linked list.  NOTE: The kstat framework does not
231eda14cbcSMatt Macy  *		manage the data section, so all variable-size kstats must be
232eda14cbcSMatt Macy  *		virtual kstats.  Moreover, variable-size kstats MUST employ
233eda14cbcSMatt Macy  *		kstat data locking to prevent data-size races with kstat
234eda14cbcSMatt Macy  *		clients.  See the section on "kstat snapshot" for details.
235eda14cbcSMatt Macy  *
236eda14cbcSMatt Macy  *	KSTAT_FLAG_WRITABLE:
237eda14cbcSMatt Macy  *
238eda14cbcSMatt Macy  *		Makes the kstat's data section writable by root.
239eda14cbcSMatt Macy  *		The ks_snapshot routine (see below) does not need to check for
240eda14cbcSMatt Macy  *		this; permission checking is handled in the kstat driver.
241eda14cbcSMatt Macy  *
242eda14cbcSMatt Macy  *	KSTAT_FLAG_PERSISTENT:
243eda14cbcSMatt Macy  *
244eda14cbcSMatt Macy  *		Indicates that this kstat is to be persistent over time.
245eda14cbcSMatt Macy  *		For persistent kstats, kstat_delete() simply marks the
246eda14cbcSMatt Macy  *		kstat as dormant; a subsequent kstat_create() reactivates
247eda14cbcSMatt Macy  *		the kstat.  This feature is provided so that statistics
248eda14cbcSMatt Macy  *		are not lost across driver close/open (e.g., raw disk I/O
249eda14cbcSMatt Macy  *		on a disk with no mounted partitions.)
250eda14cbcSMatt Macy  *		NOTE: Persistent kstats cannot be virtual, since ks_data
251eda14cbcSMatt Macy  *		points to garbage as soon as the driver goes away.
252eda14cbcSMatt Macy  *
253eda14cbcSMatt Macy  * The following flags are maintained by the kstat framework:
254eda14cbcSMatt Macy  *
255eda14cbcSMatt Macy  *	KSTAT_FLAG_DORMANT:
256eda14cbcSMatt Macy  *
257eda14cbcSMatt Macy  *		For persistent kstats, indicates that the kstat is in the
258eda14cbcSMatt Macy  *		dormant state (e.g., the corresponding device is closed).
259eda14cbcSMatt Macy  *
260eda14cbcSMatt Macy  *	KSTAT_FLAG_INVALID:
261eda14cbcSMatt Macy  *
262eda14cbcSMatt Macy  *		This flag is set when a kstat is in a transitional state,
263eda14cbcSMatt Macy  *		e.g. between kstat_create() and kstat_install().
264eda14cbcSMatt Macy  *		kstat clients must not attempt to access the kstat's data
265eda14cbcSMatt Macy  *		if this flag is set.
266eda14cbcSMatt Macy  */
267eda14cbcSMatt Macy 
268eda14cbcSMatt Macy #define	KSTAT_FLAG_VIRTUAL		0x01
269eda14cbcSMatt Macy #define	KSTAT_FLAG_VAR_SIZE		0x02
270eda14cbcSMatt Macy #define	KSTAT_FLAG_WRITABLE		0x04
271eda14cbcSMatt Macy #define	KSTAT_FLAG_PERSISTENT		0x08
272eda14cbcSMatt Macy #define	KSTAT_FLAG_DORMANT		0x10
273eda14cbcSMatt Macy #define	KSTAT_FLAG_INVALID		0x20
274eda14cbcSMatt Macy #define	KSTAT_FLAG_LONGSTRINGS		0x40
275eda14cbcSMatt Macy #define	KSTAT_FLAG_NO_HEADERS		0x80
276eda14cbcSMatt Macy 
277eda14cbcSMatt Macy /*
278eda14cbcSMatt Macy  * Dynamic update support
279eda14cbcSMatt Macy  *
280eda14cbcSMatt Macy  * The kstat mechanism allows for an optional ks_update function to update
281eda14cbcSMatt Macy  * kstat data.  This is useful for drivers where the underlying device
282eda14cbcSMatt Macy  * keeps cheap hardware stats, but extraction is expensive.  Instead of
283eda14cbcSMatt Macy  * constantly keeping the kstat data section up to date, you can supply a
284eda14cbcSMatt Macy  * ks_update function which updates the kstat's data section on demand.
285eda14cbcSMatt Macy  * To take advantage of this feature, simply set the ks_update field before
286eda14cbcSMatt Macy  * calling kstat_install().
287eda14cbcSMatt Macy  *
288eda14cbcSMatt Macy  * The ks_update function, if supplied, must have the following structure:
289eda14cbcSMatt Macy  *
290eda14cbcSMatt Macy  *	int
291eda14cbcSMatt Macy  *	foo_kstat_update(kstat_t *ksp, int rw)
292eda14cbcSMatt Macy  *	{
293eda14cbcSMatt Macy  *		if (rw == KSTAT_WRITE) {
294eda14cbcSMatt Macy  *			... update the native stats from ksp->ks_data;
295eda14cbcSMatt Macy  *				return EACCES if you don't support this
296eda14cbcSMatt Macy  *		} else {
297eda14cbcSMatt Macy  *			... update ksp->ks_data from the native stats
298eda14cbcSMatt Macy  *		}
299eda14cbcSMatt Macy  *	}
300eda14cbcSMatt Macy  *
301eda14cbcSMatt Macy  * The ks_update return codes are: 0 for success, EACCES if you don't allow
302eda14cbcSMatt Macy  * KSTAT_WRITE, and EIO for any other type of error.
303eda14cbcSMatt Macy  *
304eda14cbcSMatt Macy  * In general, the ks_update function may need to refer to provider-private
305eda14cbcSMatt Macy  * data; for example, it may need a pointer to the provider's raw statistics.
306eda14cbcSMatt Macy  * The ks_private field is available for this purpose.  Its use is entirely
307eda14cbcSMatt Macy  * at the provider's discretion.
308eda14cbcSMatt Macy  *
309eda14cbcSMatt Macy  * All variable-size kstats MUST supply a ks_update routine, which computes
310eda14cbcSMatt Macy  * and sets ks_data_size (and ks_ndata if that is meaningful), since these
311eda14cbcSMatt Macy  * are needed to perform kstat snapshots (see below).
312eda14cbcSMatt Macy  *
313eda14cbcSMatt Macy  * No kstat locking should be done inside the ks_update routine.  The caller
314eda14cbcSMatt Macy  * will already be holding the kstat's ks_lock (to ensure consistent data).
315eda14cbcSMatt Macy  */
316eda14cbcSMatt Macy 
317eda14cbcSMatt Macy #define	KSTAT_READ	0
318eda14cbcSMatt Macy #define	KSTAT_WRITE	1
319eda14cbcSMatt Macy 
320eda14cbcSMatt Macy /*
321eda14cbcSMatt Macy  * Kstat snapshot
322eda14cbcSMatt Macy  *
323eda14cbcSMatt Macy  * In order to get a consistent view of a kstat's data, clients must obey
324eda14cbcSMatt Macy  * the kstat's locking strategy.  However, these clients may need to perform
325eda14cbcSMatt Macy  * operations on the data which could cause a fault (e.g. copyout()), or
326eda14cbcSMatt Macy  * operations which are simply expensive.  Doing so could cause deadlock
327eda14cbcSMatt Macy  * (e.g. if you're holding a disk's kstat lock which is ultimately required
328eda14cbcSMatt Macy  * to resolve a copyout() fault), performance degradation (since the providers'
329eda14cbcSMatt Macy  * activity is serialized at the kstat lock), device timing problems, etc.
330eda14cbcSMatt Macy  *
331eda14cbcSMatt Macy  * To avoid these problems, kstat data is provided via snapshots.  Taking
332eda14cbcSMatt Macy  * a snapshot is a simple process: allocate a wired-down kernel buffer,
333eda14cbcSMatt Macy  * acquire the kstat's data lock, copy the data into the buffer ("take the
334eda14cbcSMatt Macy  * snapshot"), and release the lock.  This ensures that the kstat's data lock
335eda14cbcSMatt Macy  * will be held as briefly as possible, and that no faults will occur while
336eda14cbcSMatt Macy  * the lock is held.
337eda14cbcSMatt Macy  *
338eda14cbcSMatt Macy  * Normally, the snapshot is taken by default_kstat_snapshot(), which
339eda14cbcSMatt Macy  * timestamps the data (sets ks_snaptime), copies it, and does a little
340eda14cbcSMatt Macy  * massaging to deal with incomplete transactions on i/o kstats.  However,
341eda14cbcSMatt Macy  * this routine only works for kstats with contiguous data (the typical case).
342eda14cbcSMatt Macy  * If you create a kstat whose data is, say, a linked list, you must provide
343eda14cbcSMatt Macy  * your own ks_snapshot routine.  The routine you supply must have the
344eda14cbcSMatt Macy  * following prototype (replace "foo" with something appropriate):
345eda14cbcSMatt Macy  *
346eda14cbcSMatt Macy  *	int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
347eda14cbcSMatt Macy  *
348eda14cbcSMatt Macy  * The minimal snapshot routine -- one which copies contiguous data that
349eda14cbcSMatt Macy  * doesn't need any massaging -- would be this:
350eda14cbcSMatt Macy  *
351eda14cbcSMatt Macy  *	ksp->ks_snaptime = gethrtime();
352eda14cbcSMatt Macy  *	if (rw == KSTAT_WRITE)
353da5137abSMartin Matuska  *		memcpy(ksp->ks_data, buf, ksp->ks_data_size);
354eda14cbcSMatt Macy  *	else
355da5137abSMartin Matuska  *		memcpy(buf, ksp->ks_data, ksp->ks_data_size);
356eda14cbcSMatt Macy  *	return (0);
357eda14cbcSMatt Macy  *
358eda14cbcSMatt Macy  * A more illuminating example is taking a snapshot of a linked list:
359eda14cbcSMatt Macy  *
360eda14cbcSMatt Macy  *	ksp->ks_snaptime = gethrtime();
361eda14cbcSMatt Macy  *	if (rw == KSTAT_WRITE)
362eda14cbcSMatt Macy  *		return (EACCES);		... See below ...
363eda14cbcSMatt Macy  *	for (foo = first_foo; foo; foo = foo->next) {
364da5137abSMartin Matuska  *		memcpy(buf, foo, sizeof (struct foo));
365eda14cbcSMatt Macy  *		buf = ((struct foo *) buf) + 1;
366eda14cbcSMatt Macy  *	}
367eda14cbcSMatt Macy  *	return (0);
368eda14cbcSMatt Macy  *
369eda14cbcSMatt Macy  * In the example above, we have decided that we don't want to allow
370eda14cbcSMatt Macy  * KSTAT_WRITE access, so we return EACCES if this is attempted.
371eda14cbcSMatt Macy  *
372eda14cbcSMatt Macy  * The key points are:
373eda14cbcSMatt Macy  *
374eda14cbcSMatt Macy  *	(1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
375eda14cbcSMatt Macy  *	(2) Data gets copied from the kstat to the buffer on KSTAT_READ,
376eda14cbcSMatt Macy  *		and from the buffer to the kstat on KSTAT_WRITE.
377eda14cbcSMatt Macy  *	(3) ks_snapshot return values are: 0 for success, EACCES if you
378eda14cbcSMatt Macy  *		don't allow KSTAT_WRITE, and EIO for any other type of error.
379eda14cbcSMatt Macy  *
380eda14cbcSMatt Macy  * Named kstats (see section on "Named statistics" below) containing long
381eda14cbcSMatt Macy  * strings (KSTAT_DATA_STRING) need special handling.  The kstat driver
382eda14cbcSMatt Macy  * assumes that all strings are copied into the buffer after the array of
383eda14cbcSMatt Macy  * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
384eda14cbcSMatt Macy  * into the copy within the buffer. The default snapshot routine does this,
385eda14cbcSMatt Macy  * but overriding routines should contain at least the following:
386eda14cbcSMatt Macy  *
387eda14cbcSMatt Macy  * if (rw == KSTAT_READ) {
388eda14cbcSMatt Macy  * 	kstat_named_t *knp = buf;
389eda14cbcSMatt Macy  * 	char *end = knp + ksp->ks_ndata;
390eda14cbcSMatt Macy  * 	uint_t i;
391eda14cbcSMatt Macy  *
392eda14cbcSMatt Macy  * 	... Do the regular copy ...
393da5137abSMartin Matuska  * 	memcpy(buf, ksp->ks_data, sizeof (kstat_named_t) * ksp->ks_ndata);
394eda14cbcSMatt Macy  *
395eda14cbcSMatt Macy  * 	for (i = 0; i < ksp->ks_ndata; i++, knp++) {
396eda14cbcSMatt Macy  *		if (knp[i].data_type == KSTAT_DATA_STRING &&
397eda14cbcSMatt Macy  *		    KSTAT_NAMED_STR_PTR(knp) != NULL) {
398da5137abSMartin Matuska  *			memcpy(end, KSTAT_NAMED_STR_PTR(knp),
399eda14cbcSMatt Macy  *			    KSTAT_NAMED_STR_BUFLEN(knp));
400eda14cbcSMatt Macy  *			KSTAT_NAMED_STR_PTR(knp) = end;
401eda14cbcSMatt Macy  *			end += KSTAT_NAMED_STR_BUFLEN(knp);
402eda14cbcSMatt Macy  *		}
403eda14cbcSMatt Macy  *	}
404eda14cbcSMatt Macy  */
405eda14cbcSMatt Macy 
406eda14cbcSMatt Macy /*
407eda14cbcSMatt Macy  * Named statistics.
408eda14cbcSMatt Macy  *
409eda14cbcSMatt Macy  * List of arbitrary name=value statistics.
410eda14cbcSMatt Macy  */
411eda14cbcSMatt Macy 
412eda14cbcSMatt Macy typedef struct kstat_named {
413eda14cbcSMatt Macy 	char	name[KSTAT_STRLEN];	/* name of counter */
414eda14cbcSMatt Macy 	uchar_t	data_type;		/* data type */
415eda14cbcSMatt Macy 	union {
416eda14cbcSMatt Macy 		char		c[16];	/* enough for 128-bit ints */
417eda14cbcSMatt Macy 		int32_t		i32;
418eda14cbcSMatt Macy 		uint32_t	ui32;
419eda14cbcSMatt Macy 		struct {
420eda14cbcSMatt Macy 			union {
421eda14cbcSMatt Macy 				char 		*ptr;	/* NULL-term string */
422eda14cbcSMatt Macy #if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
423eda14cbcSMatt Macy 				caddr32_t	ptr32;
424eda14cbcSMatt Macy #endif
425eda14cbcSMatt Macy 				char 		__pad[8]; /* 64-bit padding */
426eda14cbcSMatt Macy 			} addr;
427eda14cbcSMatt Macy 			uint32_t	len;	/* # bytes for strlen + '\0' */
428eda14cbcSMatt Macy 		} str;
429eda14cbcSMatt Macy /*
430eda14cbcSMatt Macy  * The int64_t and uint64_t types are not valid for a maximally conformant
431eda14cbcSMatt Macy  * 32-bit compilation environment (cc -Xc) using compilers prior to the
432eda14cbcSMatt Macy  * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
433eda14cbcSMatt Macy  * In these cases, the visibility of i64 and ui64 is only permitted for
434eda14cbcSMatt Macy  * 64-bit compilation environments or 32-bit non-maximally conformant
435eda14cbcSMatt Macy  * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
436eda14cbcSMatt Macy  * C99 ANSI C compilation environment, the long long type is supported.
437e3aa18adSMartin Matuska  * The _INT64_TYPE is defined by the implementation (see sys/inttypes.h).
438eda14cbcSMatt Macy  */
439eda14cbcSMatt Macy #if defined(_INT64_TYPE)
440eda14cbcSMatt Macy 		int64_t		i64;
441eda14cbcSMatt Macy 		uint64_t	ui64;
442eda14cbcSMatt Macy #endif
443eda14cbcSMatt Macy 		long		l;
444eda14cbcSMatt Macy 		ulong_t		ul;
445eda14cbcSMatt Macy 
446eda14cbcSMatt Macy 		/* These structure members are obsolete */
447eda14cbcSMatt Macy 
448eda14cbcSMatt Macy 		longlong_t	ll;
449eda14cbcSMatt Macy 		u_longlong_t	ull;
450eda14cbcSMatt Macy 		float		f;
451eda14cbcSMatt Macy 		double		d;
452eda14cbcSMatt Macy 	} value;			/* value of counter */
453eda14cbcSMatt Macy } kstat_named_t;
454eda14cbcSMatt Macy 
455eda14cbcSMatt Macy #define	KSTAT_DATA_CHAR		0
456eda14cbcSMatt Macy #define	KSTAT_DATA_INT32	1
457eda14cbcSMatt Macy #define	KSTAT_DATA_UINT32	2
458eda14cbcSMatt Macy #define	KSTAT_DATA_INT64	3
459eda14cbcSMatt Macy #define	KSTAT_DATA_UINT64	4
460eda14cbcSMatt Macy 
461eda14cbcSMatt Macy #if !defined(_LP64)
462eda14cbcSMatt Macy #define	KSTAT_DATA_LONG		KSTAT_DATA_INT32
463eda14cbcSMatt Macy #define	KSTAT_DATA_ULONG	KSTAT_DATA_UINT32
464eda14cbcSMatt Macy #else
465eda14cbcSMatt Macy #if !defined(_KERNEL)
466eda14cbcSMatt Macy #define	KSTAT_DATA_LONG		KSTAT_DATA_INT64
467eda14cbcSMatt Macy #define	KSTAT_DATA_ULONG	KSTAT_DATA_UINT64
468eda14cbcSMatt Macy #else
469eda14cbcSMatt Macy #define	KSTAT_DATA_LONG		7	/* only visible to the kernel */
470eda14cbcSMatt Macy #define	KSTAT_DATA_ULONG	8	/* only visible to the kernel */
471eda14cbcSMatt Macy #endif	/* !_KERNEL */
472eda14cbcSMatt Macy #endif	/* !_LP64 */
473eda14cbcSMatt Macy 
474eda14cbcSMatt Macy /*
475eda14cbcSMatt Macy  * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
476eda14cbcSMatt Macy  * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
477eda14cbcSMatt Macy  * (kstat_named_t)).  ks_data_size in these cases is equal to the sum of the
478eda14cbcSMatt Macy  * amount of space required to store the strings (ie, the sum of
479eda14cbcSMatt Macy  * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
480eda14cbcSMatt Macy  * space required to store the kstat_named_t's.
481eda14cbcSMatt Macy  *
482eda14cbcSMatt Macy  * The default update routine will update ks_data_size automatically for
483eda14cbcSMatt Macy  * variable-length kstats containing long strings (using the default update
484eda14cbcSMatt Macy  * routine only makes sense if the string is the only thing that is changing
485eda14cbcSMatt Macy  * in size, and ks_ndata is constant).  Fixed-length kstats containing long
486eda14cbcSMatt Macy  * strings must explicitly change ks_data_size (after creation but before
487eda14cbcSMatt Macy  * initialization) to reflect the correct amount of space required for the
488eda14cbcSMatt Macy  * long strings and the kstat_named_t's.
489eda14cbcSMatt Macy  */
490eda14cbcSMatt Macy #define	KSTAT_DATA_STRING	9
491eda14cbcSMatt Macy 
492eda14cbcSMatt Macy /* These types are obsolete */
493eda14cbcSMatt Macy 
494eda14cbcSMatt Macy #define	KSTAT_DATA_LONGLONG	KSTAT_DATA_INT64
495eda14cbcSMatt Macy #define	KSTAT_DATA_ULONGLONG	KSTAT_DATA_UINT64
496eda14cbcSMatt Macy #define	KSTAT_DATA_FLOAT	5
497eda14cbcSMatt Macy #define	KSTAT_DATA_DOUBLE	6
498eda14cbcSMatt Macy 
499eda14cbcSMatt Macy #define	KSTAT_NAMED_PTR(kptr)	((kstat_named_t *)(kptr)->ks_data)
500eda14cbcSMatt Macy 
501eda14cbcSMatt Macy /*
502eda14cbcSMatt Macy  * Retrieve the pointer of the string contained in the given named kstat.
503eda14cbcSMatt Macy  */
504eda14cbcSMatt Macy #define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
505eda14cbcSMatt Macy 
506eda14cbcSMatt Macy /*
507eda14cbcSMatt Macy  * Retrieve the length of the buffer required to store the string in the given
508eda14cbcSMatt Macy  * named kstat.
509eda14cbcSMatt Macy  */
510eda14cbcSMatt Macy #define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
511eda14cbcSMatt Macy 
512eda14cbcSMatt Macy /*
513eda14cbcSMatt Macy  * Interrupt statistics.
514eda14cbcSMatt Macy  *
515eda14cbcSMatt Macy  * An interrupt is a hard interrupt (sourced from the hardware device
516eda14cbcSMatt Macy  * itself), a soft interrupt (induced by the system via the use of
517eda14cbcSMatt Macy  * some system interrupt source), a watchdog interrupt (induced by
518eda14cbcSMatt Macy  * a periodic timer call), spurious (an interrupt entry point was
519eda14cbcSMatt Macy  * entered but there was no interrupt condition to service),
520eda14cbcSMatt Macy  * or multiple service (an interrupt condition was detected and
521eda14cbcSMatt Macy  * serviced just prior to returning from any of the other types).
522eda14cbcSMatt Macy  *
523eda14cbcSMatt Macy  * Measurement of the spurious class of interrupts is useful for
524eda14cbcSMatt Macy  * autovectored devices in order to pinpoint any interrupt latency
525eda14cbcSMatt Macy  * problems in a particular system configuration.
526eda14cbcSMatt Macy  *
527eda14cbcSMatt Macy  * Devices that have more than one interrupt of the same
528eda14cbcSMatt Macy  * type should use multiple structures.
529eda14cbcSMatt Macy  */
530eda14cbcSMatt Macy 
531eda14cbcSMatt Macy #define	KSTAT_INTR_HARD			0
532eda14cbcSMatt Macy #define	KSTAT_INTR_SOFT			1
533eda14cbcSMatt Macy #define	KSTAT_INTR_WATCHDOG		2
534eda14cbcSMatt Macy #define	KSTAT_INTR_SPURIOUS		3
535eda14cbcSMatt Macy #define	KSTAT_INTR_MULTSVC		4
536eda14cbcSMatt Macy 
537eda14cbcSMatt Macy #define	KSTAT_NUM_INTRS			5
538eda14cbcSMatt Macy 
539eda14cbcSMatt Macy typedef struct kstat_intr {
540eda14cbcSMatt Macy 	uint_t	intrs[KSTAT_NUM_INTRS];	/* interrupt counters */
541eda14cbcSMatt Macy } kstat_intr_t;
542eda14cbcSMatt Macy 
543eda14cbcSMatt Macy #define	KSTAT_INTR_PTR(kptr)	((kstat_intr_t *)(kptr)->ks_data)
544eda14cbcSMatt Macy 
545eda14cbcSMatt Macy /*
546eda14cbcSMatt Macy  * I/O statistics.
547eda14cbcSMatt Macy  */
548eda14cbcSMatt Macy 
549eda14cbcSMatt Macy typedef struct kstat_io {
550eda14cbcSMatt Macy 
551eda14cbcSMatt Macy 	/*
552eda14cbcSMatt Macy 	 * Basic counters.
553eda14cbcSMatt Macy 	 *
554eda14cbcSMatt Macy 	 * The counters should be updated at the end of service
555eda14cbcSMatt Macy 	 * (e.g., just prior to calling biodone()).
556eda14cbcSMatt Macy 	 */
557eda14cbcSMatt Macy 
558eda14cbcSMatt Macy 	u_longlong_t	nread;		/* number of bytes read */
559eda14cbcSMatt Macy 	u_longlong_t	nwritten;	/* number of bytes written */
560eda14cbcSMatt Macy 	uint_t		reads;		/* number of read operations */
561eda14cbcSMatt Macy 	uint_t		writes;		/* number of write operations */
562eda14cbcSMatt Macy 
563eda14cbcSMatt Macy 	/*
564eda14cbcSMatt Macy 	 * Accumulated time and queue length statistics.
565eda14cbcSMatt Macy 	 *
566eda14cbcSMatt Macy 	 * Accumulated time statistics are kept as a running sum
567eda14cbcSMatt Macy 	 * of "active" time.  Queue length statistics are kept as a
568eda14cbcSMatt Macy 	 * running sum of the product of queue length and elapsed time
569eda14cbcSMatt Macy 	 * at that length -- i.e., a Riemann sum for queue length
570eda14cbcSMatt Macy 	 * integrated against time.  (You can also think of the active time
571eda14cbcSMatt Macy 	 * as a Riemann sum, for the boolean function (queue_length > 0)
572eda14cbcSMatt Macy 	 * integrated against time, or you can think of it as the
573eda14cbcSMatt Macy 	 * Lebesgue measure of the set on which queue_length > 0.)
574eda14cbcSMatt Macy 	 *
575eda14cbcSMatt Macy 	 *		^
576eda14cbcSMatt Macy 	 *		|			_________
577eda14cbcSMatt Macy 	 *		8			| i4	|
578eda14cbcSMatt Macy 	 *		|			|	|
579eda14cbcSMatt Macy 	 *	Queue	6			|	|
580eda14cbcSMatt Macy 	 *	Length	|	_________	|	|
581eda14cbcSMatt Macy 	 *		4	| i2	|_______|	|
582eda14cbcSMatt Macy 	 *		|	|	    i3		|
583eda14cbcSMatt Macy 	 *		2_______|			|
584eda14cbcSMatt Macy 	 *		|    i1				|
585eda14cbcSMatt Macy 	 *		|_______________________________|
586eda14cbcSMatt Macy 	 *		Time->	t1	t2	t3	t4
587eda14cbcSMatt Macy 	 *
588eda14cbcSMatt Macy 	 * At each change of state (entry or exit from the queue),
589eda14cbcSMatt Macy 	 * we add the elapsed time (since the previous state change)
590eda14cbcSMatt Macy 	 * to the active time if the queue length was non-zero during
591eda14cbcSMatt Macy 	 * that interval; and we add the product of the elapsed time
592eda14cbcSMatt Macy 	 * times the queue length to the running length*time sum.
593eda14cbcSMatt Macy 	 *
594eda14cbcSMatt Macy 	 * This method is generalizable to measuring residency
595eda14cbcSMatt Macy 	 * in any defined system: instead of queue lengths, think
596eda14cbcSMatt Macy 	 * of "outstanding RPC calls to server X".
597eda14cbcSMatt Macy 	 *
598eda14cbcSMatt Macy 	 * A large number of I/O subsystems have at least two basic
599eda14cbcSMatt Macy 	 * "lists" of transactions they manage: one for transactions
600eda14cbcSMatt Macy 	 * that have been accepted for processing but for which processing
601eda14cbcSMatt Macy 	 * has yet to begin, and one for transactions which are actively
602eda14cbcSMatt Macy 	 * being processed (but not done). For this reason, two cumulative
603eda14cbcSMatt Macy 	 * time statistics are defined here: wait (pre-service) time,
604eda14cbcSMatt Macy 	 * and run (service) time.
605eda14cbcSMatt Macy 	 *
606eda14cbcSMatt Macy 	 * All times are 64-bit nanoseconds (hrtime_t), as returned by
607eda14cbcSMatt Macy 	 * gethrtime().
608eda14cbcSMatt Macy 	 *
609eda14cbcSMatt Macy 	 * The units of cumulative busy time are accumulated nanoseconds.
610eda14cbcSMatt Macy 	 * The units of cumulative length*time products are elapsed time
611eda14cbcSMatt Macy 	 * times queue length.
612eda14cbcSMatt Macy 	 *
613eda14cbcSMatt Macy 	 * Updates to the fields below are performed implicitly by calls to
614eda14cbcSMatt Macy 	 * these five functions:
615eda14cbcSMatt Macy 	 *
616eda14cbcSMatt Macy 	 *	kstat_waitq_enter()
617eda14cbcSMatt Macy 	 *	kstat_waitq_exit()
618eda14cbcSMatt Macy 	 *	kstat_runq_enter()
619eda14cbcSMatt Macy 	 *	kstat_runq_exit()
620eda14cbcSMatt Macy 	 *
621eda14cbcSMatt Macy 	 *	kstat_waitq_to_runq()		(see below)
622eda14cbcSMatt Macy 	 *	kstat_runq_back_to_waitq()	(see below)
623eda14cbcSMatt Macy 	 *
624eda14cbcSMatt Macy 	 * Since kstat_waitq_exit() is typically followed immediately
625eda14cbcSMatt Macy 	 * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
626eda14cbcSMatt Macy 	 * function which performs both operations.  This is a performance
627eda14cbcSMatt Macy 	 * win since only one timestamp is required.
628eda14cbcSMatt Macy 	 *
629eda14cbcSMatt Macy 	 * In some instances, it may be necessary to move a request from
630eda14cbcSMatt Macy 	 * the run queue back to the wait queue, e.g. for write throttling.
631eda14cbcSMatt Macy 	 * For these situations, call kstat_runq_back_to_waitq().
632eda14cbcSMatt Macy 	 *
633eda14cbcSMatt Macy 	 * These fields should never be updated by any other means.
634eda14cbcSMatt Macy 	 */
635eda14cbcSMatt Macy 
636eda14cbcSMatt Macy 	hrtime_t wtime;		/* cumulative wait (pre-service) time */
637eda14cbcSMatt Macy 	hrtime_t wlentime;	/* cumulative wait length*time product */
638eda14cbcSMatt Macy 	hrtime_t wlastupdate;	/* last time wait queue changed */
639eda14cbcSMatt Macy 	hrtime_t rtime;		/* cumulative run (service) time */
640eda14cbcSMatt Macy 	hrtime_t rlentime;	/* cumulative run length*time product */
641eda14cbcSMatt Macy 	hrtime_t rlastupdate;	/* last time run queue changed */
642eda14cbcSMatt Macy 
643eda14cbcSMatt Macy 	uint_t	wcnt;		/* count of elements in wait state */
644eda14cbcSMatt Macy 	uint_t	rcnt;		/* count of elements in run state */
645eda14cbcSMatt Macy 
646eda14cbcSMatt Macy } kstat_io_t;
647eda14cbcSMatt Macy 
648eda14cbcSMatt Macy #define	KSTAT_IO_PTR(kptr)	((kstat_io_t *)(kptr)->ks_data)
649eda14cbcSMatt Macy 
650eda14cbcSMatt Macy /*
651eda14cbcSMatt Macy  * Event timer statistics - cumulative elapsed time and number of events.
652eda14cbcSMatt Macy  *
653eda14cbcSMatt Macy  * Updates to these fields are performed implicitly by calls to
654eda14cbcSMatt Macy  * kstat_timer_start() and kstat_timer_stop().
655eda14cbcSMatt Macy  */
656eda14cbcSMatt Macy 
657eda14cbcSMatt Macy typedef struct kstat_timer {
658eda14cbcSMatt Macy 	char		name[KSTAT_STRLEN];	/* event name */
659eda14cbcSMatt Macy 	uchar_t		resv;			/* reserved */
660eda14cbcSMatt Macy 	u_longlong_t	num_events;		/* number of events */
661eda14cbcSMatt Macy 	hrtime_t	elapsed_time;		/* cumulative elapsed time */
662eda14cbcSMatt Macy 	hrtime_t	min_time;		/* shortest event duration */
663eda14cbcSMatt Macy 	hrtime_t	max_time;		/* longest event duration */
664eda14cbcSMatt Macy 	hrtime_t	start_time;		/* previous event start time */
665eda14cbcSMatt Macy 	hrtime_t	stop_time;		/* previous event stop time */
666eda14cbcSMatt Macy } kstat_timer_t;
667eda14cbcSMatt Macy 
668eda14cbcSMatt Macy #define	KSTAT_TIMER_PTR(kptr)	((kstat_timer_t *)(kptr)->ks_data)
669eda14cbcSMatt Macy 
670eda14cbcSMatt Macy #if	defined(_KERNEL)
671eda14cbcSMatt Macy 
672eda14cbcSMatt Macy #include <sys/t_lock.h>
673eda14cbcSMatt Macy 
674eda14cbcSMatt Macy extern kid_t	kstat_chain_id;		/* bumped at each state change */
675eda14cbcSMatt Macy extern void	kstat_init(void);	/* initialize kstat framework */
676eda14cbcSMatt Macy 
677eda14cbcSMatt Macy /*
678eda14cbcSMatt Macy  * Adding and deleting kstats.
679eda14cbcSMatt Macy  *
680eda14cbcSMatt Macy  * The typical sequence to add a kstat is:
681eda14cbcSMatt Macy  *
682eda14cbcSMatt Macy  *	ksp = kstat_create(module, instance, name, class, type, ndata, flags);
683eda14cbcSMatt Macy  *	if (ksp) {
684eda14cbcSMatt Macy  *		... provider initialization, if necessary
685eda14cbcSMatt Macy  *		kstat_install(ksp);
686eda14cbcSMatt Macy  *	}
687eda14cbcSMatt Macy  *
688eda14cbcSMatt Macy  * There are three logically distinct steps here:
689eda14cbcSMatt Macy  *
690eda14cbcSMatt Macy  * Step 1: System Initialization (kstat_create)
691eda14cbcSMatt Macy  *
692eda14cbcSMatt Macy  * kstat_create() performs system initialization.  kstat_create()
693eda14cbcSMatt Macy  * allocates memory for the entire kstat (header plus data), initializes
694eda14cbcSMatt Macy  * all header fields, initializes the data section to all zeroes, assigns
695eda14cbcSMatt Macy  * a unique KID, and puts the kstat onto the system's kstat chain.
696eda14cbcSMatt Macy  * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
697eda14cbcSMatt Macy  * because the provider (caller) has not yet had a chance to initialize
698eda14cbcSMatt Macy  * the data section.
699eda14cbcSMatt Macy  *
700eda14cbcSMatt Macy  * By default, kstats are exported to all zones on the system.  A kstat may be
701eda14cbcSMatt Macy  * created via kstat_create_zone() to specify a zone to which the statistics
702eda14cbcSMatt Macy  * should be exported.  kstat_zone_add() may be used to specify additional
703eda14cbcSMatt Macy  * zones to which the statistics are to be exported.
704eda14cbcSMatt Macy  *
705eda14cbcSMatt Macy  * Step 2: Provider Initialization
706eda14cbcSMatt Macy  *
707eda14cbcSMatt Macy  * The provider performs any necessary initialization of the data section,
708eda14cbcSMatt Macy  * e.g. setting the name fields in a KSTAT_TYPE_NAMED.  Virtual kstats set
709eda14cbcSMatt Macy  * the ks_data field at this time.  The provider may also set the ks_update,
710eda14cbcSMatt Macy  * ks_snapshot, ks_private, and ks_lock fields if necessary.
711eda14cbcSMatt Macy  *
712eda14cbcSMatt Macy  * Step 3: Installation (kstat_install)
713eda14cbcSMatt Macy  *
714eda14cbcSMatt Macy  * Once the kstat is completely initialized, kstat_install() clears the
715eda14cbcSMatt Macy  * INVALID flag, thus making the kstat accessible to the outside world.
716eda14cbcSMatt Macy  * kstat_install() also clears the DORMANT flag for persistent kstats.
717eda14cbcSMatt Macy  *
718eda14cbcSMatt Macy  * Removing a kstat from the system
719eda14cbcSMatt Macy  *
720eda14cbcSMatt Macy  * kstat_delete(ksp) removes ksp from the kstat chain and frees all
721eda14cbcSMatt Macy  * associated system resources.  NOTE: When you call kstat_delete(),
722eda14cbcSMatt Macy  * you must NOT be holding that kstat's ks_lock.  Otherwise, you may
723eda14cbcSMatt Macy  * deadlock with a kstat reader.
724eda14cbcSMatt Macy  *
725eda14cbcSMatt Macy  * Persistent kstats
726eda14cbcSMatt Macy  *
727eda14cbcSMatt Macy  * From the provider's point of view, persistence is transparent.  The only
728eda14cbcSMatt Macy  * difference between ephemeral (normal) kstats and persistent kstats
729eda14cbcSMatt Macy  * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create().  Magically,
730eda14cbcSMatt Macy  * this has the effect of making your data visible even when you're
731eda14cbcSMatt Macy  * not home.  Persistence is important to tools like iostat, which want
732eda14cbcSMatt Macy  * to get a meaningful picture of disk activity.  Without persistence,
733eda14cbcSMatt Macy  * raw disk i/o statistics could never accumulate: they would come and
734eda14cbcSMatt Macy  * go with each open/close of the raw device.
735eda14cbcSMatt Macy  *
736eda14cbcSMatt Macy  * The magic of persistence works by slightly altering the behavior of
737eda14cbcSMatt Macy  * kstat_create() and kstat_delete().  The first call to kstat_create()
738eda14cbcSMatt Macy  * creates a new kstat, as usual.  However, kstat_delete() does not
739eda14cbcSMatt Macy  * actually delete the kstat: it performs one final update of the data
740eda14cbcSMatt Macy  * (i.e., calls the ks_update routine), marks the kstat as dormant, and
741eda14cbcSMatt Macy  * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
742eda14cbcSMatt Macy  * to their default values (since they might otherwise point to garbage,
743eda14cbcSMatt Macy  * e.g. if the provider is going away).  kstat clients can still access
744eda14cbcSMatt Macy  * the dormant kstat just like a live kstat; they just continue to see
745eda14cbcSMatt Macy  * the final data values as long as the kstat remains dormant.
746eda14cbcSMatt Macy  * All subsequent kstat_create() calls simply find the already-existing,
747eda14cbcSMatt Macy  * dormant kstat and return a pointer to it, without altering any fields.
748eda14cbcSMatt Macy  * The provider then performs its usual initialization sequence, and
749eda14cbcSMatt Macy  * calls kstat_install().  kstat_install() uses the old data values to
750eda14cbcSMatt Macy  * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
751eda14cbcSMatt Macy  * thus making it seem like you were never gone.
752eda14cbcSMatt Macy  */
753eda14cbcSMatt Macy 
754eda14cbcSMatt Macy extern kstat_t *kstat_create(const char *, int, const char *, const char *,
755eda14cbcSMatt Macy     uchar_t, uint_t, uchar_t);
756eda14cbcSMatt Macy extern kstat_t *kstat_create_zone(const char *, int, const char *,
757eda14cbcSMatt Macy     const char *, uchar_t, uint_t, uchar_t, zoneid_t);
758eda14cbcSMatt Macy extern void kstat_install(kstat_t *);
759eda14cbcSMatt Macy extern void kstat_delete(kstat_t *);
760eda14cbcSMatt Macy extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
761eda14cbcSMatt Macy extern void kstat_set_string(char *, const char *);
762eda14cbcSMatt Macy extern void kstat_delete_byname(const char *, int, const char *);
763eda14cbcSMatt Macy extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
764eda14cbcSMatt Macy extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
765eda14cbcSMatt Macy extern void kstat_timer_init(kstat_timer_t *, const char *);
766eda14cbcSMatt Macy extern void kstat_timer_start(kstat_timer_t *);
767eda14cbcSMatt Macy extern void kstat_timer_stop(kstat_timer_t *);
768eda14cbcSMatt Macy 
769eda14cbcSMatt Macy extern void kstat_zone_add(kstat_t *, zoneid_t);
770eda14cbcSMatt Macy extern void kstat_zone_remove(kstat_t *, zoneid_t);
771eda14cbcSMatt Macy extern int kstat_zone_find(kstat_t *, zoneid_t);
772eda14cbcSMatt Macy 
773eda14cbcSMatt Macy extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
774eda14cbcSMatt Macy extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
775eda14cbcSMatt Macy extern void kstat_rele(kstat_t *);
776eda14cbcSMatt Macy 
777eda14cbcSMatt Macy #endif	/* defined(_KERNEL) */
778eda14cbcSMatt Macy 
779eda14cbcSMatt Macy #ifdef	__cplusplus
780eda14cbcSMatt Macy }
781eda14cbcSMatt Macy #endif
782eda14cbcSMatt Macy 
783eda14cbcSMatt Macy #endif	/* _SYS_KSTAT_H */
784