xref: /freebsd/sys/contrib/openzfs/module/zfs/fm.c (revision 535af610)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Fault Management Architecture (FMA) Resource and Protocol Support
27  *
28  * The routines contained herein provide services to support kernel subsystems
29  * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30  *
31  * Name-Value Pair Lists
32  *
33  * The embodiment of an FMA protocol element (event, fmri or authority) is a
34  * name-value pair list (nvlist_t).  FMA-specific nvlist constructor and
35  * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36  * to create an nvpair list using custom allocators.  Callers may choose to
37  * allocate either from the kernel memory allocator, or from a preallocated
38  * buffer, useful in constrained contexts like high-level interrupt routines.
39  *
40  * Protocol Event and FMRI Construction
41  *
42  * Convenience routines are provided to construct nvlist events according to
43  * the FMA Event Protocol and Naming Schema specification for ereports and
44  * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45  *
46  * ENA Manipulation
47  *
48  * Routines to generate ENA formats 0, 1 and 2 are available as well as
49  * routines to increment formats 1 and 2.  Individual fields within the
50  * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51  * fm_ena_format_get() and fm_ena_gen_get().
52  */
53 
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
66 #ifdef _KERNEL
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/zfs_ioctl.h>
70 
71 static uint_t zfs_zevent_len_max = 512;
72 
73 static uint_t zevent_len_cur = 0;
74 static int zevent_waiters = 0;
75 static int zevent_flags = 0;
76 
77 /* Num events rate limited since the last time zfs_zevent_next() was called */
78 static uint64_t ratelimit_dropped = 0;
79 
80 /*
81  * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
82  * posted.  The posted EIDs are monotonically increasing but not persistent.
83  * They will be reset to the initial value (1) each time the kernel module is
84  * loaded.
85  */
86 static uint64_t zevent_eid = 0;
87 
88 static kmutex_t zevent_lock;
89 static list_t zevent_list;
90 static kcondvar_t zevent_cv;
91 #endif /* _KERNEL */
92 
93 
94 /*
95  * Common fault management kstats to record event generation failures
96  */
97 
98 struct erpt_kstat {
99 	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
100 	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
101 	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
102 	kstat_named_t	payload_set_failed;	/* num payload set failures */
103 	kstat_named_t	erpt_duplicates;	/* num duplicate erpts */
104 };
105 
106 static struct erpt_kstat erpt_kstat_data = {
107 	{ "erpt-dropped", KSTAT_DATA_UINT64 },
108 	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
109 	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
110 	{ "payload-set-failed", KSTAT_DATA_UINT64 },
111 	{ "erpt-duplicates", KSTAT_DATA_UINT64 }
112 };
113 
114 kstat_t *fm_ksp;
115 
116 #ifdef _KERNEL
117 
118 static zevent_t *
119 zfs_zevent_alloc(void)
120 {
121 	zevent_t *ev;
122 
123 	ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
124 
125 	list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
126 	    offsetof(zfs_zevent_t, ze_node));
127 	list_link_init(&ev->ev_node);
128 
129 	return (ev);
130 }
131 
132 static void
133 zfs_zevent_free(zevent_t *ev)
134 {
135 	/* Run provided cleanup callback */
136 	ev->ev_cb(ev->ev_nvl, ev->ev_detector);
137 
138 	list_destroy(&ev->ev_ze_list);
139 	kmem_free(ev, sizeof (zevent_t));
140 }
141 
142 static void
143 zfs_zevent_drain(zevent_t *ev)
144 {
145 	zfs_zevent_t *ze;
146 
147 	ASSERT(MUTEX_HELD(&zevent_lock));
148 	list_remove(&zevent_list, ev);
149 
150 	/* Remove references to this event in all private file data */
151 	while ((ze = list_remove_head(&ev->ev_ze_list)) != NULL) {
152 		ze->ze_zevent = NULL;
153 		ze->ze_dropped++;
154 	}
155 
156 	zfs_zevent_free(ev);
157 }
158 
159 void
160 zfs_zevent_drain_all(uint_t *count)
161 {
162 	zevent_t *ev;
163 
164 	mutex_enter(&zevent_lock);
165 	while ((ev = list_head(&zevent_list)) != NULL)
166 		zfs_zevent_drain(ev);
167 
168 	*count = zevent_len_cur;
169 	zevent_len_cur = 0;
170 	mutex_exit(&zevent_lock);
171 }
172 
173 /*
174  * New zevents are inserted at the head.  If the maximum queue
175  * length is exceeded a zevent will be drained from the tail.
176  * As part of this any user space processes which currently have
177  * a reference to this zevent_t in their private data will have
178  * this reference set to NULL.
179  */
180 static void
181 zfs_zevent_insert(zevent_t *ev)
182 {
183 	ASSERT(MUTEX_HELD(&zevent_lock));
184 	list_insert_head(&zevent_list, ev);
185 
186 	if (zevent_len_cur >= zfs_zevent_len_max)
187 		zfs_zevent_drain(list_tail(&zevent_list));
188 	else
189 		zevent_len_cur++;
190 }
191 
192 /*
193  * Post a zevent. The cb will be called when nvl and detector are no longer
194  * needed, i.e.:
195  * - An error happened and a zevent can't be posted. In this case, cb is called
196  *   before zfs_zevent_post() returns.
197  * - The event is being drained and freed.
198  */
199 int
200 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
201 {
202 	inode_timespec_t tv;
203 	int64_t tv_array[2];
204 	uint64_t eid;
205 	size_t nvl_size = 0;
206 	zevent_t *ev;
207 	int error;
208 
209 	ASSERT(cb != NULL);
210 
211 	gethrestime(&tv);
212 	tv_array[0] = tv.tv_sec;
213 	tv_array[1] = tv.tv_nsec;
214 
215 	error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
216 	if (error) {
217 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
218 		goto out;
219 	}
220 
221 	eid = atomic_inc_64_nv(&zevent_eid);
222 	error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
223 	if (error) {
224 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
225 		goto out;
226 	}
227 
228 	error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
229 	if (error) {
230 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
231 		goto out;
232 	}
233 
234 	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
235 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
236 		error = EOVERFLOW;
237 		goto out;
238 	}
239 
240 	ev = zfs_zevent_alloc();
241 	if (ev == NULL) {
242 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
243 		error = ENOMEM;
244 		goto out;
245 	}
246 
247 	ev->ev_nvl = nvl;
248 	ev->ev_detector = detector;
249 	ev->ev_cb = cb;
250 	ev->ev_eid = eid;
251 
252 	mutex_enter(&zevent_lock);
253 	zfs_zevent_insert(ev);
254 	cv_broadcast(&zevent_cv);
255 	mutex_exit(&zevent_lock);
256 
257 out:
258 	if (error)
259 		cb(nvl, detector);
260 
261 	return (error);
262 }
263 
264 void
265 zfs_zevent_track_duplicate(void)
266 {
267 	atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
268 }
269 
270 static int
271 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
272 {
273 	*ze = zfsdev_get_state(minor, ZST_ZEVENT);
274 	if (*ze == NULL)
275 		return (SET_ERROR(EBADF));
276 
277 	return (0);
278 }
279 
280 zfs_file_t *
281 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
282 {
283 	zfs_file_t *fp = zfs_file_get(fd);
284 	if (fp == NULL)
285 		return (NULL);
286 
287 	int error = zfsdev_getminor(fp, minorp);
288 	if (error == 0)
289 		error = zfs_zevent_minor_to_state(*minorp, ze);
290 
291 	if (error) {
292 		zfs_zevent_fd_rele(fp);
293 		fp = NULL;
294 	}
295 
296 	return (fp);
297 }
298 
299 void
300 zfs_zevent_fd_rele(zfs_file_t *fp)
301 {
302 	zfs_file_put(fp);
303 }
304 
305 /*
306  * Get the next zevent in the stream and place a copy in 'event'.  This
307  * may fail with ENOMEM if the encoded nvlist size exceeds the passed
308  * 'event_size'.  In this case the stream pointer is not advanced and
309  * and 'event_size' is set to the minimum required buffer size.
310  */
311 int
312 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
313     uint64_t *dropped)
314 {
315 	zevent_t *ev;
316 	size_t size;
317 	int error = 0;
318 
319 	mutex_enter(&zevent_lock);
320 	if (ze->ze_zevent == NULL) {
321 		/* New stream start at the beginning/tail */
322 		ev = list_tail(&zevent_list);
323 		if (ev == NULL) {
324 			error = ENOENT;
325 			goto out;
326 		}
327 	} else {
328 		/*
329 		 * Existing stream continue with the next element and remove
330 		 * ourselves from the wait queue for the previous element
331 		 */
332 		ev = list_prev(&zevent_list, ze->ze_zevent);
333 		if (ev == NULL) {
334 			error = ENOENT;
335 			goto out;
336 		}
337 	}
338 
339 	VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
340 	if (size > *event_size) {
341 		*event_size = size;
342 		error = ENOMEM;
343 		goto out;
344 	}
345 
346 	if (ze->ze_zevent)
347 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
348 
349 	ze->ze_zevent = ev;
350 	list_insert_head(&ev->ev_ze_list, ze);
351 	(void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
352 	*dropped = ze->ze_dropped;
353 
354 #ifdef _KERNEL
355 	/* Include events dropped due to rate limiting */
356 	*dropped += atomic_swap_64(&ratelimit_dropped, 0);
357 #endif
358 	ze->ze_dropped = 0;
359 out:
360 	mutex_exit(&zevent_lock);
361 
362 	return (error);
363 }
364 
365 /*
366  * Wait in an interruptible state for any new events.
367  */
368 int
369 zfs_zevent_wait(zfs_zevent_t *ze)
370 {
371 	int error = EAGAIN;
372 
373 	mutex_enter(&zevent_lock);
374 	zevent_waiters++;
375 
376 	while (error == EAGAIN) {
377 		if (zevent_flags & ZEVENT_SHUTDOWN) {
378 			error = SET_ERROR(ESHUTDOWN);
379 			break;
380 		}
381 
382 		if (cv_wait_sig(&zevent_cv, &zevent_lock) == 0) {
383 			error = SET_ERROR(EINTR);
384 			break;
385 		} else if (!list_is_empty(&zevent_list)) {
386 			error = 0;
387 			continue;
388 		} else {
389 			error = EAGAIN;
390 		}
391 	}
392 
393 	zevent_waiters--;
394 	mutex_exit(&zevent_lock);
395 
396 	return (error);
397 }
398 
399 /*
400  * The caller may seek to a specific EID by passing that EID.  If the EID
401  * is still available in the posted list of events the cursor is positioned
402  * there.  Otherwise ENOENT is returned and the cursor is not moved.
403  *
404  * There are two reserved EIDs which may be passed and will never fail.
405  * ZEVENT_SEEK_START positions the cursor at the start of the list, and
406  * ZEVENT_SEEK_END positions the cursor at the end of the list.
407  */
408 int
409 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
410 {
411 	zevent_t *ev;
412 	int error = 0;
413 
414 	mutex_enter(&zevent_lock);
415 
416 	if (eid == ZEVENT_SEEK_START) {
417 		if (ze->ze_zevent)
418 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
419 
420 		ze->ze_zevent = NULL;
421 		goto out;
422 	}
423 
424 	if (eid == ZEVENT_SEEK_END) {
425 		if (ze->ze_zevent)
426 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
427 
428 		ev = list_head(&zevent_list);
429 		if (ev) {
430 			ze->ze_zevent = ev;
431 			list_insert_head(&ev->ev_ze_list, ze);
432 		} else {
433 			ze->ze_zevent = NULL;
434 		}
435 
436 		goto out;
437 	}
438 
439 	for (ev = list_tail(&zevent_list); ev != NULL;
440 	    ev = list_prev(&zevent_list, ev)) {
441 		if (ev->ev_eid == eid) {
442 			if (ze->ze_zevent)
443 				list_remove(&ze->ze_zevent->ev_ze_list, ze);
444 
445 			ze->ze_zevent = ev;
446 			list_insert_head(&ev->ev_ze_list, ze);
447 			break;
448 		}
449 	}
450 
451 	if (ev == NULL)
452 		error = ENOENT;
453 
454 out:
455 	mutex_exit(&zevent_lock);
456 
457 	return (error);
458 }
459 
460 void
461 zfs_zevent_init(zfs_zevent_t **zep)
462 {
463 	zfs_zevent_t *ze;
464 
465 	ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
466 	list_link_init(&ze->ze_node);
467 }
468 
469 void
470 zfs_zevent_destroy(zfs_zevent_t *ze)
471 {
472 	mutex_enter(&zevent_lock);
473 	if (ze->ze_zevent)
474 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
475 	mutex_exit(&zevent_lock);
476 
477 	kmem_free(ze, sizeof (zfs_zevent_t));
478 }
479 #endif /* _KERNEL */
480 
481 /*
482  * Wrappers for FM nvlist allocators
483  */
484 static void *
485 i_fm_alloc(nv_alloc_t *nva, size_t size)
486 {
487 	(void) nva;
488 	return (kmem_alloc(size, KM_SLEEP));
489 }
490 
491 static void
492 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
493 {
494 	(void) nva;
495 	kmem_free(buf, size);
496 }
497 
498 static const nv_alloc_ops_t fm_mem_alloc_ops = {
499 	.nv_ao_init = NULL,
500 	.nv_ao_fini = NULL,
501 	.nv_ao_alloc = i_fm_alloc,
502 	.nv_ao_free = i_fm_free,
503 	.nv_ao_reset = NULL
504 };
505 
506 /*
507  * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
508  * to the newly allocated nv_alloc_t structure is returned upon success or NULL
509  * is returned to indicate that the nv_alloc structure could not be created.
510  */
511 nv_alloc_t *
512 fm_nva_xcreate(char *buf, size_t bufsz)
513 {
514 	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
515 
516 	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
517 		kmem_free(nvhdl, sizeof (nv_alloc_t));
518 		return (NULL);
519 	}
520 
521 	return (nvhdl);
522 }
523 
524 /*
525  * Destroy a previously allocated nv_alloc structure.  The fixed buffer
526  * associated with nva must be freed by the caller.
527  */
528 void
529 fm_nva_xdestroy(nv_alloc_t *nva)
530 {
531 	nv_alloc_fini(nva);
532 	kmem_free(nva, sizeof (nv_alloc_t));
533 }
534 
535 /*
536  * Create a new nv list.  A pointer to a new nv list structure is returned
537  * upon success or NULL is returned to indicate that the structure could
538  * not be created.  The newly created nv list is created and managed by the
539  * operations installed in nva.   If nva is NULL, the default FMA nva
540  * operations are installed and used.
541  *
542  * When called from the kernel and nva == NULL, this function must be called
543  * from passive kernel context with no locks held that can prevent a
544  * sleeping memory allocation from occurring.  Otherwise, this function may
545  * be called from other kernel contexts as long a valid nva created via
546  * fm_nva_create() is supplied.
547  */
548 nvlist_t *
549 fm_nvlist_create(nv_alloc_t *nva)
550 {
551 	int hdl_alloced = 0;
552 	nvlist_t *nvl;
553 	nv_alloc_t *nvhdl;
554 
555 	if (nva == NULL) {
556 		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
557 
558 		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
559 			kmem_free(nvhdl, sizeof (nv_alloc_t));
560 			return (NULL);
561 		}
562 		hdl_alloced = 1;
563 	} else {
564 		nvhdl = nva;
565 	}
566 
567 	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
568 		if (hdl_alloced) {
569 			nv_alloc_fini(nvhdl);
570 			kmem_free(nvhdl, sizeof (nv_alloc_t));
571 		}
572 		return (NULL);
573 	}
574 
575 	return (nvl);
576 }
577 
578 /*
579  * Destroy a previously allocated nvlist structure.  flag indicates whether
580  * or not the associated nva structure should be freed (FM_NVA_FREE) or
581  * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
582  * it to be re-used for future nvlist creation operations.
583  */
584 void
585 fm_nvlist_destroy(nvlist_t *nvl, int flag)
586 {
587 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
588 
589 	nvlist_free(nvl);
590 
591 	if (nva != NULL) {
592 		if (flag == FM_NVA_FREE)
593 			fm_nva_xdestroy(nva);
594 	}
595 }
596 
597 int
598 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
599 {
600 	int nelem, ret = 0;
601 	data_type_t type;
602 
603 	while (ret == 0 && name != NULL) {
604 		type = va_arg(ap, data_type_t);
605 		switch (type) {
606 		case DATA_TYPE_BYTE:
607 			ret = nvlist_add_byte(payload, name,
608 			    va_arg(ap, uint_t));
609 			break;
610 		case DATA_TYPE_BYTE_ARRAY:
611 			nelem = va_arg(ap, int);
612 			ret = nvlist_add_byte_array(payload, name,
613 			    va_arg(ap, uchar_t *), nelem);
614 			break;
615 		case DATA_TYPE_BOOLEAN_VALUE:
616 			ret = nvlist_add_boolean_value(payload, name,
617 			    va_arg(ap, boolean_t));
618 			break;
619 		case DATA_TYPE_BOOLEAN_ARRAY:
620 			nelem = va_arg(ap, int);
621 			ret = nvlist_add_boolean_array(payload, name,
622 			    va_arg(ap, boolean_t *), nelem);
623 			break;
624 		case DATA_TYPE_INT8:
625 			ret = nvlist_add_int8(payload, name,
626 			    va_arg(ap, int));
627 			break;
628 		case DATA_TYPE_INT8_ARRAY:
629 			nelem = va_arg(ap, int);
630 			ret = nvlist_add_int8_array(payload, name,
631 			    va_arg(ap, int8_t *), nelem);
632 			break;
633 		case DATA_TYPE_UINT8:
634 			ret = nvlist_add_uint8(payload, name,
635 			    va_arg(ap, uint_t));
636 			break;
637 		case DATA_TYPE_UINT8_ARRAY:
638 			nelem = va_arg(ap, int);
639 			ret = nvlist_add_uint8_array(payload, name,
640 			    va_arg(ap, uint8_t *), nelem);
641 			break;
642 		case DATA_TYPE_INT16:
643 			ret = nvlist_add_int16(payload, name,
644 			    va_arg(ap, int));
645 			break;
646 		case DATA_TYPE_INT16_ARRAY:
647 			nelem = va_arg(ap, int);
648 			ret = nvlist_add_int16_array(payload, name,
649 			    va_arg(ap, int16_t *), nelem);
650 			break;
651 		case DATA_TYPE_UINT16:
652 			ret = nvlist_add_uint16(payload, name,
653 			    va_arg(ap, uint_t));
654 			break;
655 		case DATA_TYPE_UINT16_ARRAY:
656 			nelem = va_arg(ap, int);
657 			ret = nvlist_add_uint16_array(payload, name,
658 			    va_arg(ap, uint16_t *), nelem);
659 			break;
660 		case DATA_TYPE_INT32:
661 			ret = nvlist_add_int32(payload, name,
662 			    va_arg(ap, int32_t));
663 			break;
664 		case DATA_TYPE_INT32_ARRAY:
665 			nelem = va_arg(ap, int);
666 			ret = nvlist_add_int32_array(payload, name,
667 			    va_arg(ap, int32_t *), nelem);
668 			break;
669 		case DATA_TYPE_UINT32:
670 			ret = nvlist_add_uint32(payload, name,
671 			    va_arg(ap, uint32_t));
672 			break;
673 		case DATA_TYPE_UINT32_ARRAY:
674 			nelem = va_arg(ap, int);
675 			ret = nvlist_add_uint32_array(payload, name,
676 			    va_arg(ap, uint32_t *), nelem);
677 			break;
678 		case DATA_TYPE_INT64:
679 			ret = nvlist_add_int64(payload, name,
680 			    va_arg(ap, int64_t));
681 			break;
682 		case DATA_TYPE_INT64_ARRAY:
683 			nelem = va_arg(ap, int);
684 			ret = nvlist_add_int64_array(payload, name,
685 			    va_arg(ap, int64_t *), nelem);
686 			break;
687 		case DATA_TYPE_UINT64:
688 			ret = nvlist_add_uint64(payload, name,
689 			    va_arg(ap, uint64_t));
690 			break;
691 		case DATA_TYPE_UINT64_ARRAY:
692 			nelem = va_arg(ap, int);
693 			ret = nvlist_add_uint64_array(payload, name,
694 			    va_arg(ap, uint64_t *), nelem);
695 			break;
696 		case DATA_TYPE_STRING:
697 			ret = nvlist_add_string(payload, name,
698 			    va_arg(ap, char *));
699 			break;
700 		case DATA_TYPE_STRING_ARRAY:
701 			nelem = va_arg(ap, int);
702 			ret = nvlist_add_string_array(payload, name,
703 			    va_arg(ap, const char **), nelem);
704 			break;
705 		case DATA_TYPE_NVLIST:
706 			ret = nvlist_add_nvlist(payload, name,
707 			    va_arg(ap, nvlist_t *));
708 			break;
709 		case DATA_TYPE_NVLIST_ARRAY:
710 			nelem = va_arg(ap, int);
711 			ret = nvlist_add_nvlist_array(payload, name,
712 			    va_arg(ap, const nvlist_t **), nelem);
713 			break;
714 		default:
715 			ret = EINVAL;
716 		}
717 
718 		name = va_arg(ap, char *);
719 	}
720 	return (ret);
721 }
722 
723 void
724 fm_payload_set(nvlist_t *payload, ...)
725 {
726 	int ret;
727 	const char *name;
728 	va_list ap;
729 
730 	va_start(ap, payload);
731 	name = va_arg(ap, char *);
732 	ret = i_fm_payload_set(payload, name, ap);
733 	va_end(ap);
734 
735 	if (ret)
736 		atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
737 }
738 
739 /*
740  * Set-up and validate the members of an ereport event according to:
741  *
742  *	Member name		Type		Value
743  *	====================================================
744  *	class			string		ereport
745  *	version			uint8_t		0
746  *	ena			uint64_t	<ena>
747  *	detector		nvlist_t	<detector>
748  *	ereport-payload		nvlist_t	<var args>
749  *
750  * We don't actually add a 'version' member to the payload.  Really,
751  * the version quoted to us by our caller is that of the category 1
752  * "ereport" event class (and we require FM_EREPORT_VERS0) but
753  * the payload version of the actual leaf class event under construction
754  * may be something else.  Callers should supply a version in the varargs,
755  * or (better) we could take two version arguments - one for the
756  * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
757  * for the leaf class.
758  */
759 void
760 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
761     uint64_t ena, const nvlist_t *detector, ...)
762 {
763 	char ereport_class[FM_MAX_CLASS];
764 	const char *name;
765 	va_list ap;
766 	int ret;
767 
768 	if (version != FM_EREPORT_VERS0) {
769 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
770 		return;
771 	}
772 
773 	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
774 	    FM_EREPORT_CLASS, erpt_class);
775 	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
776 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
777 		return;
778 	}
779 
780 	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
781 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
782 	}
783 
784 	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
785 	    (nvlist_t *)detector) != 0) {
786 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
787 	}
788 
789 	va_start(ap, detector);
790 	name = va_arg(ap, const char *);
791 	ret = i_fm_payload_set(ereport, name, ap);
792 	va_end(ap);
793 
794 	if (ret)
795 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
796 }
797 
798 /*
799  * Set-up and validate the members of an hc fmri according to;
800  *
801  *	Member name		Type		Value
802  *	===================================================
803  *	version			uint8_t		0
804  *	auth			nvlist_t	<auth>
805  *	hc-name			string		<name>
806  *	hc-id			string		<id>
807  *
808  * Note that auth and hc-id are optional members.
809  */
810 
811 #define	HC_MAXPAIRS	20
812 #define	HC_MAXNAMELEN	50
813 
814 static int
815 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
816 {
817 	if (version != FM_HC_SCHEME_VERSION) {
818 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
819 		return (0);
820 	}
821 
822 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
823 	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
824 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
825 		return (0);
826 	}
827 
828 	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
829 	    (nvlist_t *)auth) != 0) {
830 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
831 		return (0);
832 	}
833 
834 	return (1);
835 }
836 
837 void
838 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
839     nvlist_t *snvl, int npairs, ...)
840 {
841 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
842 	nvlist_t *pairs[HC_MAXPAIRS];
843 	va_list ap;
844 	int i;
845 
846 	if (!fm_fmri_hc_set_common(fmri, version, auth))
847 		return;
848 
849 	npairs = MIN(npairs, HC_MAXPAIRS);
850 
851 	va_start(ap, npairs);
852 	for (i = 0; i < npairs; i++) {
853 		const char *name = va_arg(ap, const char *);
854 		uint32_t id = va_arg(ap, uint32_t);
855 		char idstr[11];
856 
857 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
858 
859 		pairs[i] = fm_nvlist_create(nva);
860 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
861 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
862 			atomic_inc_64(
863 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
864 		}
865 	}
866 	va_end(ap);
867 
868 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,
869 	    (const nvlist_t **)pairs, npairs) != 0) {
870 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
871 	}
872 
873 	for (i = 0; i < npairs; i++)
874 		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
875 
876 	if (snvl != NULL) {
877 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
878 			atomic_inc_64(
879 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
880 		}
881 	}
882 }
883 
884 void
885 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
886     nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
887 {
888 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
889 	nvlist_t *pairs[HC_MAXPAIRS];
890 	nvlist_t **hcl;
891 	uint_t n;
892 	int i, j;
893 	va_list ap;
894 	const char *hcname, *hcid;
895 
896 	if (!fm_fmri_hc_set_common(fmri, version, auth))
897 		return;
898 
899 	/*
900 	 * copy the bboard nvpairs to the pairs array
901 	 */
902 	if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
903 	    != 0) {
904 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
905 		return;
906 	}
907 
908 	for (i = 0; i < n; i++) {
909 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
910 		    &hcname) != 0) {
911 			atomic_inc_64(
912 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
913 			return;
914 		}
915 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
916 			atomic_inc_64(
917 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
918 			return;
919 		}
920 
921 		pairs[i] = fm_nvlist_create(nva);
922 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
923 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
924 			for (j = 0; j <= i; j++) {
925 				if (pairs[j] != NULL)
926 					fm_nvlist_destroy(pairs[j],
927 					    FM_NVA_RETAIN);
928 			}
929 			atomic_inc_64(
930 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
931 			return;
932 		}
933 	}
934 
935 	/*
936 	 * create the pairs from passed in pairs
937 	 */
938 	npairs = MIN(npairs, HC_MAXPAIRS);
939 
940 	va_start(ap, npairs);
941 	for (i = n; i < npairs + n; i++) {
942 		const char *name = va_arg(ap, const char *);
943 		uint32_t id = va_arg(ap, uint32_t);
944 		char idstr[11];
945 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
946 		pairs[i] = fm_nvlist_create(nva);
947 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
948 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
949 			for (j = 0; j <= i; j++) {
950 				if (pairs[j] != NULL)
951 					fm_nvlist_destroy(pairs[j],
952 					    FM_NVA_RETAIN);
953 			}
954 			atomic_inc_64(
955 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
956 			va_end(ap);
957 			return;
958 		}
959 	}
960 	va_end(ap);
961 
962 	/*
963 	 * Create the fmri hc list
964 	 */
965 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,
966 	    (const nvlist_t **)pairs, npairs + n) != 0) {
967 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
968 		return;
969 	}
970 
971 	for (i = 0; i < npairs + n; i++) {
972 			fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
973 	}
974 
975 	if (snvl != NULL) {
976 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
977 			atomic_inc_64(
978 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
979 			return;
980 		}
981 	}
982 }
983 
984 /*
985  * Set-up and validate the members of an dev fmri according to:
986  *
987  *	Member name		Type		Value
988  *	====================================================
989  *	version			uint8_t		0
990  *	auth			nvlist_t	<auth>
991  *	devpath			string		<devpath>
992  *	[devid]			string		<devid>
993  *	[target-port-l0id]	string		<target-port-lun0-id>
994  *
995  * Note that auth and devid are optional members.
996  */
997 void
998 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
999     const char *devpath, const char *devid, const char *tpl0)
1000 {
1001 	int err = 0;
1002 
1003 	if (version != DEV_SCHEME_VERSION0) {
1004 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1005 		return;
1006 	}
1007 
1008 	err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1009 	err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1010 
1011 	if (auth != NULL) {
1012 		err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1013 		    (nvlist_t *)auth);
1014 	}
1015 
1016 	err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1017 
1018 	if (devid != NULL)
1019 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1020 
1021 	if (tpl0 != NULL)
1022 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1023 
1024 	if (err)
1025 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1026 
1027 }
1028 
1029 /*
1030  * Set-up and validate the members of an cpu fmri according to:
1031  *
1032  *	Member name		Type		Value
1033  *	====================================================
1034  *	version			uint8_t		0
1035  *	auth			nvlist_t	<auth>
1036  *	cpuid			uint32_t	<cpu_id>
1037  *	cpumask			uint8_t		<cpu_mask>
1038  *	serial			uint64_t	<serial_id>
1039  *
1040  * Note that auth, cpumask, serial are optional members.
1041  *
1042  */
1043 void
1044 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1045     uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1046 {
1047 	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1048 
1049 	if (version < CPU_SCHEME_VERSION1) {
1050 		atomic_inc_64(failedp);
1051 		return;
1052 	}
1053 
1054 	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1055 		atomic_inc_64(failedp);
1056 		return;
1057 	}
1058 
1059 	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1060 	    FM_FMRI_SCHEME_CPU) != 0) {
1061 		atomic_inc_64(failedp);
1062 		return;
1063 	}
1064 
1065 	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1066 	    (nvlist_t *)auth) != 0)
1067 		atomic_inc_64(failedp);
1068 
1069 	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1070 		atomic_inc_64(failedp);
1071 
1072 	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1073 	    *cpu_maskp) != 0)
1074 		atomic_inc_64(failedp);
1075 
1076 	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1077 	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1078 			atomic_inc_64(failedp);
1079 }
1080 
1081 /*
1082  * Set-up and validate the members of a mem according to:
1083  *
1084  *	Member name		Type		Value
1085  *	====================================================
1086  *	version			uint8_t		0
1087  *	auth			nvlist_t	<auth>		[optional]
1088  *	unum			string		<unum>
1089  *	serial			string		<serial>	[optional*]
1090  *	offset			uint64_t	<offset>	[optional]
1091  *
1092  *	* serial is required if offset is present
1093  */
1094 void
1095 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1096     const char *unum, const char *serial, uint64_t offset)
1097 {
1098 	if (version != MEM_SCHEME_VERSION0) {
1099 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1100 		return;
1101 	}
1102 
1103 	if (!serial && (offset != (uint64_t)-1)) {
1104 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1105 		return;
1106 	}
1107 
1108 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1109 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1110 		return;
1111 	}
1112 
1113 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1114 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1115 		return;
1116 	}
1117 
1118 	if (auth != NULL) {
1119 		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1120 		    (nvlist_t *)auth) != 0) {
1121 			atomic_inc_64(
1122 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1123 		}
1124 	}
1125 
1126 	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1127 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1128 	}
1129 
1130 	if (serial != NULL) {
1131 		if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1132 		    (const char **)&serial, 1) != 0) {
1133 			atomic_inc_64(
1134 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1135 		}
1136 		if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1137 		    FM_FMRI_MEM_OFFSET, offset) != 0) {
1138 			atomic_inc_64(
1139 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1140 		}
1141 	}
1142 }
1143 
1144 void
1145 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1146     uint64_t vdev_guid)
1147 {
1148 	if (version != ZFS_SCHEME_VERSION0) {
1149 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1150 		return;
1151 	}
1152 
1153 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1154 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1155 		return;
1156 	}
1157 
1158 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1159 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1160 		return;
1161 	}
1162 
1163 	if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1164 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1165 	}
1166 
1167 	if (vdev_guid != 0) {
1168 		if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1169 			atomic_inc_64(
1170 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1171 		}
1172 	}
1173 }
1174 
1175 uint64_t
1176 fm_ena_increment(uint64_t ena)
1177 {
1178 	uint64_t new_ena;
1179 
1180 	switch (ENA_FORMAT(ena)) {
1181 	case FM_ENA_FMT1:
1182 		new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1183 		break;
1184 	case FM_ENA_FMT2:
1185 		new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1186 		break;
1187 	default:
1188 		new_ena = 0;
1189 	}
1190 
1191 	return (new_ena);
1192 }
1193 
1194 uint64_t
1195 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1196 {
1197 	uint64_t ena = 0;
1198 
1199 	switch (format) {
1200 	case FM_ENA_FMT1:
1201 		if (timestamp) {
1202 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1203 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1204 			    ENA_FMT1_CPUID_MASK) |
1205 			    ((timestamp << ENA_FMT1_TIME_SHFT) &
1206 			    ENA_FMT1_TIME_MASK));
1207 		} else {
1208 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1209 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1210 			    ENA_FMT1_CPUID_MASK) |
1211 			    ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1212 			    ENA_FMT1_TIME_MASK));
1213 		}
1214 		break;
1215 	case FM_ENA_FMT2:
1216 		ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1217 		    ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1218 		break;
1219 	default:
1220 		break;
1221 	}
1222 
1223 	return (ena);
1224 }
1225 
1226 uint64_t
1227 fm_ena_generate(uint64_t timestamp, uchar_t format)
1228 {
1229 	uint64_t ena;
1230 
1231 	kpreempt_disable();
1232 	ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1233 	kpreempt_enable();
1234 
1235 	return (ena);
1236 }
1237 
1238 uint64_t
1239 fm_ena_generation_get(uint64_t ena)
1240 {
1241 	uint64_t gen;
1242 
1243 	switch (ENA_FORMAT(ena)) {
1244 	case FM_ENA_FMT1:
1245 		gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1246 		break;
1247 	case FM_ENA_FMT2:
1248 		gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1249 		break;
1250 	default:
1251 		gen = 0;
1252 		break;
1253 	}
1254 
1255 	return (gen);
1256 }
1257 
1258 uchar_t
1259 fm_ena_format_get(uint64_t ena)
1260 {
1261 
1262 	return (ENA_FORMAT(ena));
1263 }
1264 
1265 uint64_t
1266 fm_ena_id_get(uint64_t ena)
1267 {
1268 	uint64_t id;
1269 
1270 	switch (ENA_FORMAT(ena)) {
1271 	case FM_ENA_FMT1:
1272 		id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1273 		break;
1274 	case FM_ENA_FMT2:
1275 		id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1276 		break;
1277 	default:
1278 		id = 0;
1279 	}
1280 
1281 	return (id);
1282 }
1283 
1284 uint64_t
1285 fm_ena_time_get(uint64_t ena)
1286 {
1287 	uint64_t time;
1288 
1289 	switch (ENA_FORMAT(ena)) {
1290 	case FM_ENA_FMT1:
1291 		time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1292 		break;
1293 	case FM_ENA_FMT2:
1294 		time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1295 		break;
1296 	default:
1297 		time = 0;
1298 	}
1299 
1300 	return (time);
1301 }
1302 
1303 #ifdef _KERNEL
1304 /*
1305  * Helper function to increment ereport dropped count.  Used by the event
1306  * rate limiting code to give feedback to the user about how many events were
1307  * rate limited by including them in the 'dropped' count.
1308  */
1309 void
1310 fm_erpt_dropped_increment(void)
1311 {
1312 	atomic_inc_64(&ratelimit_dropped);
1313 }
1314 
1315 void
1316 fm_init(void)
1317 {
1318 	zevent_len_cur = 0;
1319 	zevent_flags = 0;
1320 
1321 	/* Initialize zevent allocation and generation kstats */
1322 	fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1323 	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1324 	    KSTAT_FLAG_VIRTUAL);
1325 
1326 	if (fm_ksp != NULL) {
1327 		fm_ksp->ks_data = &erpt_kstat_data;
1328 		kstat_install(fm_ksp);
1329 	} else {
1330 		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1331 	}
1332 
1333 	mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1334 	list_create(&zevent_list, sizeof (zevent_t),
1335 	    offsetof(zevent_t, ev_node));
1336 	cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1337 
1338 	zfs_ereport_init();
1339 }
1340 
1341 void
1342 fm_fini(void)
1343 {
1344 	uint_t count;
1345 
1346 	zfs_ereport_fini();
1347 
1348 	zfs_zevent_drain_all(&count);
1349 
1350 	mutex_enter(&zevent_lock);
1351 	cv_broadcast(&zevent_cv);
1352 
1353 	zevent_flags |= ZEVENT_SHUTDOWN;
1354 	while (zevent_waiters > 0) {
1355 		mutex_exit(&zevent_lock);
1356 		kpreempt(KPREEMPT_SYNC);
1357 		mutex_enter(&zevent_lock);
1358 	}
1359 	mutex_exit(&zevent_lock);
1360 
1361 	cv_destroy(&zevent_cv);
1362 	list_destroy(&zevent_list);
1363 	mutex_destroy(&zevent_lock);
1364 
1365 	if (fm_ksp != NULL) {
1366 		kstat_delete(fm_ksp);
1367 		fm_ksp = NULL;
1368 	}
1369 }
1370 #endif /* _KERNEL */
1371 
1372 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, UINT, ZMOD_RW,
1373 	"Max event queue length");
1374