xref: /freebsd/sys/contrib/openzfs/module/zfs/fm.c (revision 16038816)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Fault Management Architecture (FMA) Resource and Protocol Support
27  *
28  * The routines contained herein provide services to support kernel subsystems
29  * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30  *
31  * Name-Value Pair Lists
32  *
33  * The embodiment of an FMA protocol element (event, fmri or authority) is a
34  * name-value pair list (nvlist_t).  FMA-specific nvlist constructor and
35  * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36  * to create an nvpair list using custom allocators.  Callers may choose to
37  * allocate either from the kernel memory allocator, or from a preallocated
38  * buffer, useful in constrained contexts like high-level interrupt routines.
39  *
40  * Protocol Event and FMRI Construction
41  *
42  * Convenience routines are provided to construct nvlist events according to
43  * the FMA Event Protocol and Naming Schema specification for ereports and
44  * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45  *
46  * ENA Manipulation
47  *
48  * Routines to generate ENA formats 0, 1 and 2 are available as well as
49  * routines to increment formats 1 and 2.  Individual fields within the
50  * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51  * fm_ena_format_get() and fm_ena_gen_get().
52  */
53 
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
66 #ifdef _KERNEL
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/zfs_ioctl.h>
70 
71 int zfs_zevent_len_max = 512;
72 
73 static int zevent_len_cur = 0;
74 static int zevent_waiters = 0;
75 static int zevent_flags = 0;
76 
77 /* Num events rate limited since the last time zfs_zevent_next() was called */
78 static uint64_t ratelimit_dropped = 0;
79 
80 /*
81  * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
82  * posted.  The posted EIDs are monotonically increasing but not persistent.
83  * They will be reset to the initial value (1) each time the kernel module is
84  * loaded.
85  */
86 static uint64_t zevent_eid = 0;
87 
88 static kmutex_t zevent_lock;
89 static list_t zevent_list;
90 static kcondvar_t zevent_cv;
91 #endif /* _KERNEL */
92 
93 
94 /*
95  * Common fault management kstats to record event generation failures
96  */
97 
98 struct erpt_kstat {
99 	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
100 	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
101 	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
102 	kstat_named_t	payload_set_failed;	/* num payload set failures */
103 	kstat_named_t	erpt_duplicates;	/* num duplicate erpts */
104 };
105 
106 static struct erpt_kstat erpt_kstat_data = {
107 	{ "erpt-dropped", KSTAT_DATA_UINT64 },
108 	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
109 	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
110 	{ "payload-set-failed", KSTAT_DATA_UINT64 },
111 	{ "erpt-duplicates", KSTAT_DATA_UINT64 }
112 };
113 
114 kstat_t *fm_ksp;
115 
116 #ifdef _KERNEL
117 
118 static zevent_t *
119 zfs_zevent_alloc(void)
120 {
121 	zevent_t *ev;
122 
123 	ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
124 
125 	list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
126 	    offsetof(zfs_zevent_t, ze_node));
127 	list_link_init(&ev->ev_node);
128 
129 	return (ev);
130 }
131 
132 static void
133 zfs_zevent_free(zevent_t *ev)
134 {
135 	/* Run provided cleanup callback */
136 	ev->ev_cb(ev->ev_nvl, ev->ev_detector);
137 
138 	list_destroy(&ev->ev_ze_list);
139 	kmem_free(ev, sizeof (zevent_t));
140 }
141 
142 static void
143 zfs_zevent_drain(zevent_t *ev)
144 {
145 	zfs_zevent_t *ze;
146 
147 	ASSERT(MUTEX_HELD(&zevent_lock));
148 	list_remove(&zevent_list, ev);
149 
150 	/* Remove references to this event in all private file data */
151 	while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
152 		list_remove(&ev->ev_ze_list, ze);
153 		ze->ze_zevent = NULL;
154 		ze->ze_dropped++;
155 	}
156 
157 	zfs_zevent_free(ev);
158 }
159 
160 void
161 zfs_zevent_drain_all(int *count)
162 {
163 	zevent_t *ev;
164 
165 	mutex_enter(&zevent_lock);
166 	while ((ev = list_head(&zevent_list)) != NULL)
167 		zfs_zevent_drain(ev);
168 
169 	*count = zevent_len_cur;
170 	zevent_len_cur = 0;
171 	mutex_exit(&zevent_lock);
172 }
173 
174 /*
175  * New zevents are inserted at the head.  If the maximum queue
176  * length is exceeded a zevent will be drained from the tail.
177  * As part of this any user space processes which currently have
178  * a reference to this zevent_t in their private data will have
179  * this reference set to NULL.
180  */
181 static void
182 zfs_zevent_insert(zevent_t *ev)
183 {
184 	ASSERT(MUTEX_HELD(&zevent_lock));
185 	list_insert_head(&zevent_list, ev);
186 
187 	if (zevent_len_cur >= zfs_zevent_len_max)
188 		zfs_zevent_drain(list_tail(&zevent_list));
189 	else
190 		zevent_len_cur++;
191 }
192 
193 /*
194  * Post a zevent. The cb will be called when nvl and detector are no longer
195  * needed, i.e.:
196  * - An error happened and a zevent can't be posted. In this case, cb is called
197  *   before zfs_zevent_post() returns.
198  * - The event is being drained and freed.
199  */
200 int
201 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
202 {
203 	inode_timespec_t tv;
204 	int64_t tv_array[2];
205 	uint64_t eid;
206 	size_t nvl_size = 0;
207 	zevent_t *ev;
208 	int error;
209 
210 	ASSERT(cb != NULL);
211 
212 	gethrestime(&tv);
213 	tv_array[0] = tv.tv_sec;
214 	tv_array[1] = tv.tv_nsec;
215 
216 	error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
217 	if (error) {
218 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
219 		goto out;
220 	}
221 
222 	eid = atomic_inc_64_nv(&zevent_eid);
223 	error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
224 	if (error) {
225 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
226 		goto out;
227 	}
228 
229 	error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
230 	if (error) {
231 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
232 		goto out;
233 	}
234 
235 	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
236 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
237 		error = EOVERFLOW;
238 		goto out;
239 	}
240 
241 	ev = zfs_zevent_alloc();
242 	if (ev == NULL) {
243 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
244 		error = ENOMEM;
245 		goto out;
246 	}
247 
248 	ev->ev_nvl = nvl;
249 	ev->ev_detector = detector;
250 	ev->ev_cb = cb;
251 	ev->ev_eid = eid;
252 
253 	mutex_enter(&zevent_lock);
254 	zfs_zevent_insert(ev);
255 	cv_broadcast(&zevent_cv);
256 	mutex_exit(&zevent_lock);
257 
258 out:
259 	if (error)
260 		cb(nvl, detector);
261 
262 	return (error);
263 }
264 
265 void
266 zfs_zevent_track_duplicate(void)
267 {
268 	atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
269 }
270 
271 static int
272 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
273 {
274 	*ze = zfsdev_get_state(minor, ZST_ZEVENT);
275 	if (*ze == NULL)
276 		return (SET_ERROR(EBADF));
277 
278 	return (0);
279 }
280 
281 int
282 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
283 {
284 	int error;
285 
286 	error = zfsdev_getminor(fd, minorp);
287 	if (error == 0)
288 		error = zfs_zevent_minor_to_state(*minorp, ze);
289 
290 	if (error)
291 		zfs_zevent_fd_rele(fd);
292 
293 	return (error);
294 }
295 
296 void
297 zfs_zevent_fd_rele(int fd)
298 {
299 	zfs_file_put(fd);
300 }
301 
302 /*
303  * Get the next zevent in the stream and place a copy in 'event'.  This
304  * may fail with ENOMEM if the encoded nvlist size exceeds the passed
305  * 'event_size'.  In this case the stream pointer is not advanced and
306  * and 'event_size' is set to the minimum required buffer size.
307  */
308 int
309 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
310     uint64_t *dropped)
311 {
312 	zevent_t *ev;
313 	size_t size;
314 	int error = 0;
315 
316 	mutex_enter(&zevent_lock);
317 	if (ze->ze_zevent == NULL) {
318 		/* New stream start at the beginning/tail */
319 		ev = list_tail(&zevent_list);
320 		if (ev == NULL) {
321 			error = ENOENT;
322 			goto out;
323 		}
324 	} else {
325 		/*
326 		 * Existing stream continue with the next element and remove
327 		 * ourselves from the wait queue for the previous element
328 		 */
329 		ev = list_prev(&zevent_list, ze->ze_zevent);
330 		if (ev == NULL) {
331 			error = ENOENT;
332 			goto out;
333 		}
334 	}
335 
336 	VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
337 	if (size > *event_size) {
338 		*event_size = size;
339 		error = ENOMEM;
340 		goto out;
341 	}
342 
343 	if (ze->ze_zevent)
344 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
345 
346 	ze->ze_zevent = ev;
347 	list_insert_head(&ev->ev_ze_list, ze);
348 	(void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
349 	*dropped = ze->ze_dropped;
350 
351 #ifdef _KERNEL
352 	/* Include events dropped due to rate limiting */
353 	*dropped += atomic_swap_64(&ratelimit_dropped, 0);
354 #endif
355 	ze->ze_dropped = 0;
356 out:
357 	mutex_exit(&zevent_lock);
358 
359 	return (error);
360 }
361 
362 /*
363  * Wait in an interruptible state for any new events.
364  */
365 int
366 zfs_zevent_wait(zfs_zevent_t *ze)
367 {
368 	int error = EAGAIN;
369 
370 	mutex_enter(&zevent_lock);
371 	zevent_waiters++;
372 
373 	while (error == EAGAIN) {
374 		if (zevent_flags & ZEVENT_SHUTDOWN) {
375 			error = SET_ERROR(ESHUTDOWN);
376 			break;
377 		}
378 
379 		error = cv_wait_sig(&zevent_cv, &zevent_lock);
380 		if (signal_pending(current)) {
381 			error = SET_ERROR(EINTR);
382 			break;
383 		} else if (!list_is_empty(&zevent_list)) {
384 			error = 0;
385 			continue;
386 		} else {
387 			error = EAGAIN;
388 		}
389 	}
390 
391 	zevent_waiters--;
392 	mutex_exit(&zevent_lock);
393 
394 	return (error);
395 }
396 
397 /*
398  * The caller may seek to a specific EID by passing that EID.  If the EID
399  * is still available in the posted list of events the cursor is positioned
400  * there.  Otherwise ENOENT is returned and the cursor is not moved.
401  *
402  * There are two reserved EIDs which may be passed and will never fail.
403  * ZEVENT_SEEK_START positions the cursor at the start of the list, and
404  * ZEVENT_SEEK_END positions the cursor at the end of the list.
405  */
406 int
407 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
408 {
409 	zevent_t *ev;
410 	int error = 0;
411 
412 	mutex_enter(&zevent_lock);
413 
414 	if (eid == ZEVENT_SEEK_START) {
415 		if (ze->ze_zevent)
416 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
417 
418 		ze->ze_zevent = NULL;
419 		goto out;
420 	}
421 
422 	if (eid == ZEVENT_SEEK_END) {
423 		if (ze->ze_zevent)
424 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
425 
426 		ev = list_head(&zevent_list);
427 		if (ev) {
428 			ze->ze_zevent = ev;
429 			list_insert_head(&ev->ev_ze_list, ze);
430 		} else {
431 			ze->ze_zevent = NULL;
432 		}
433 
434 		goto out;
435 	}
436 
437 	for (ev = list_tail(&zevent_list); ev != NULL;
438 	    ev = list_prev(&zevent_list, ev)) {
439 		if (ev->ev_eid == eid) {
440 			if (ze->ze_zevent)
441 				list_remove(&ze->ze_zevent->ev_ze_list, ze);
442 
443 			ze->ze_zevent = ev;
444 			list_insert_head(&ev->ev_ze_list, ze);
445 			break;
446 		}
447 	}
448 
449 	if (ev == NULL)
450 		error = ENOENT;
451 
452 out:
453 	mutex_exit(&zevent_lock);
454 
455 	return (error);
456 }
457 
458 void
459 zfs_zevent_init(zfs_zevent_t **zep)
460 {
461 	zfs_zevent_t *ze;
462 
463 	ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
464 	list_link_init(&ze->ze_node);
465 }
466 
467 void
468 zfs_zevent_destroy(zfs_zevent_t *ze)
469 {
470 	mutex_enter(&zevent_lock);
471 	if (ze->ze_zevent)
472 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
473 	mutex_exit(&zevent_lock);
474 
475 	kmem_free(ze, sizeof (zfs_zevent_t));
476 }
477 #endif /* _KERNEL */
478 
479 /*
480  * Wrappers for FM nvlist allocators
481  */
482 /* ARGSUSED */
483 static void *
484 i_fm_alloc(nv_alloc_t *nva, size_t size)
485 {
486 	return (kmem_zalloc(size, KM_SLEEP));
487 }
488 
489 /* ARGSUSED */
490 static void
491 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
492 {
493 	kmem_free(buf, size);
494 }
495 
496 const nv_alloc_ops_t fm_mem_alloc_ops = {
497 	.nv_ao_init = NULL,
498 	.nv_ao_fini = NULL,
499 	.nv_ao_alloc = i_fm_alloc,
500 	.nv_ao_free = i_fm_free,
501 	.nv_ao_reset = NULL
502 };
503 
504 /*
505  * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
506  * to the newly allocated nv_alloc_t structure is returned upon success or NULL
507  * is returned to indicate that the nv_alloc structure could not be created.
508  */
509 nv_alloc_t *
510 fm_nva_xcreate(char *buf, size_t bufsz)
511 {
512 	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
513 
514 	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
515 		kmem_free(nvhdl, sizeof (nv_alloc_t));
516 		return (NULL);
517 	}
518 
519 	return (nvhdl);
520 }
521 
522 /*
523  * Destroy a previously allocated nv_alloc structure.  The fixed buffer
524  * associated with nva must be freed by the caller.
525  */
526 void
527 fm_nva_xdestroy(nv_alloc_t *nva)
528 {
529 	nv_alloc_fini(nva);
530 	kmem_free(nva, sizeof (nv_alloc_t));
531 }
532 
533 /*
534  * Create a new nv list.  A pointer to a new nv list structure is returned
535  * upon success or NULL is returned to indicate that the structure could
536  * not be created.  The newly created nv list is created and managed by the
537  * operations installed in nva.   If nva is NULL, the default FMA nva
538  * operations are installed and used.
539  *
540  * When called from the kernel and nva == NULL, this function must be called
541  * from passive kernel context with no locks held that can prevent a
542  * sleeping memory allocation from occurring.  Otherwise, this function may
543  * be called from other kernel contexts as long a valid nva created via
544  * fm_nva_create() is supplied.
545  */
546 nvlist_t *
547 fm_nvlist_create(nv_alloc_t *nva)
548 {
549 	int hdl_alloced = 0;
550 	nvlist_t *nvl;
551 	nv_alloc_t *nvhdl;
552 
553 	if (nva == NULL) {
554 		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
555 
556 		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
557 			kmem_free(nvhdl, sizeof (nv_alloc_t));
558 			return (NULL);
559 		}
560 		hdl_alloced = 1;
561 	} else {
562 		nvhdl = nva;
563 	}
564 
565 	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
566 		if (hdl_alloced) {
567 			nv_alloc_fini(nvhdl);
568 			kmem_free(nvhdl, sizeof (nv_alloc_t));
569 		}
570 		return (NULL);
571 	}
572 
573 	return (nvl);
574 }
575 
576 /*
577  * Destroy a previously allocated nvlist structure.  flag indicates whether
578  * or not the associated nva structure should be freed (FM_NVA_FREE) or
579  * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
580  * it to be re-used for future nvlist creation operations.
581  */
582 void
583 fm_nvlist_destroy(nvlist_t *nvl, int flag)
584 {
585 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
586 
587 	nvlist_free(nvl);
588 
589 	if (nva != NULL) {
590 		if (flag == FM_NVA_FREE)
591 			fm_nva_xdestroy(nva);
592 	}
593 }
594 
595 int
596 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
597 {
598 	int nelem, ret = 0;
599 	data_type_t type;
600 
601 	while (ret == 0 && name != NULL) {
602 		type = va_arg(ap, data_type_t);
603 		switch (type) {
604 		case DATA_TYPE_BYTE:
605 			ret = nvlist_add_byte(payload, name,
606 			    va_arg(ap, uint_t));
607 			break;
608 		case DATA_TYPE_BYTE_ARRAY:
609 			nelem = va_arg(ap, int);
610 			ret = nvlist_add_byte_array(payload, name,
611 			    va_arg(ap, uchar_t *), nelem);
612 			break;
613 		case DATA_TYPE_BOOLEAN_VALUE:
614 			ret = nvlist_add_boolean_value(payload, name,
615 			    va_arg(ap, boolean_t));
616 			break;
617 		case DATA_TYPE_BOOLEAN_ARRAY:
618 			nelem = va_arg(ap, int);
619 			ret = nvlist_add_boolean_array(payload, name,
620 			    va_arg(ap, boolean_t *), nelem);
621 			break;
622 		case DATA_TYPE_INT8:
623 			ret = nvlist_add_int8(payload, name,
624 			    va_arg(ap, int));
625 			break;
626 		case DATA_TYPE_INT8_ARRAY:
627 			nelem = va_arg(ap, int);
628 			ret = nvlist_add_int8_array(payload, name,
629 			    va_arg(ap, int8_t *), nelem);
630 			break;
631 		case DATA_TYPE_UINT8:
632 			ret = nvlist_add_uint8(payload, name,
633 			    va_arg(ap, uint_t));
634 			break;
635 		case DATA_TYPE_UINT8_ARRAY:
636 			nelem = va_arg(ap, int);
637 			ret = nvlist_add_uint8_array(payload, name,
638 			    va_arg(ap, uint8_t *), nelem);
639 			break;
640 		case DATA_TYPE_INT16:
641 			ret = nvlist_add_int16(payload, name,
642 			    va_arg(ap, int));
643 			break;
644 		case DATA_TYPE_INT16_ARRAY:
645 			nelem = va_arg(ap, int);
646 			ret = nvlist_add_int16_array(payload, name,
647 			    va_arg(ap, int16_t *), nelem);
648 			break;
649 		case DATA_TYPE_UINT16:
650 			ret = nvlist_add_uint16(payload, name,
651 			    va_arg(ap, uint_t));
652 			break;
653 		case DATA_TYPE_UINT16_ARRAY:
654 			nelem = va_arg(ap, int);
655 			ret = nvlist_add_uint16_array(payload, name,
656 			    va_arg(ap, uint16_t *), nelem);
657 			break;
658 		case DATA_TYPE_INT32:
659 			ret = nvlist_add_int32(payload, name,
660 			    va_arg(ap, int32_t));
661 			break;
662 		case DATA_TYPE_INT32_ARRAY:
663 			nelem = va_arg(ap, int);
664 			ret = nvlist_add_int32_array(payload, name,
665 			    va_arg(ap, int32_t *), nelem);
666 			break;
667 		case DATA_TYPE_UINT32:
668 			ret = nvlist_add_uint32(payload, name,
669 			    va_arg(ap, uint32_t));
670 			break;
671 		case DATA_TYPE_UINT32_ARRAY:
672 			nelem = va_arg(ap, int);
673 			ret = nvlist_add_uint32_array(payload, name,
674 			    va_arg(ap, uint32_t *), nelem);
675 			break;
676 		case DATA_TYPE_INT64:
677 			ret = nvlist_add_int64(payload, name,
678 			    va_arg(ap, int64_t));
679 			break;
680 		case DATA_TYPE_INT64_ARRAY:
681 			nelem = va_arg(ap, int);
682 			ret = nvlist_add_int64_array(payload, name,
683 			    va_arg(ap, int64_t *), nelem);
684 			break;
685 		case DATA_TYPE_UINT64:
686 			ret = nvlist_add_uint64(payload, name,
687 			    va_arg(ap, uint64_t));
688 			break;
689 		case DATA_TYPE_UINT64_ARRAY:
690 			nelem = va_arg(ap, int);
691 			ret = nvlist_add_uint64_array(payload, name,
692 			    va_arg(ap, uint64_t *), nelem);
693 			break;
694 		case DATA_TYPE_STRING:
695 			ret = nvlist_add_string(payload, name,
696 			    va_arg(ap, char *));
697 			break;
698 		case DATA_TYPE_STRING_ARRAY:
699 			nelem = va_arg(ap, int);
700 			ret = nvlist_add_string_array(payload, name,
701 			    va_arg(ap, char **), nelem);
702 			break;
703 		case DATA_TYPE_NVLIST:
704 			ret = nvlist_add_nvlist(payload, name,
705 			    va_arg(ap, nvlist_t *));
706 			break;
707 		case DATA_TYPE_NVLIST_ARRAY:
708 			nelem = va_arg(ap, int);
709 			ret = nvlist_add_nvlist_array(payload, name,
710 			    va_arg(ap, nvlist_t **), nelem);
711 			break;
712 		default:
713 			ret = EINVAL;
714 		}
715 
716 		name = va_arg(ap, char *);
717 	}
718 	return (ret);
719 }
720 
721 void
722 fm_payload_set(nvlist_t *payload, ...)
723 {
724 	int ret;
725 	const char *name;
726 	va_list ap;
727 
728 	va_start(ap, payload);
729 	name = va_arg(ap, char *);
730 	ret = i_fm_payload_set(payload, name, ap);
731 	va_end(ap);
732 
733 	if (ret)
734 		atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
735 }
736 
737 /*
738  * Set-up and validate the members of an ereport event according to:
739  *
740  *	Member name		Type		Value
741  *	====================================================
742  *	class			string		ereport
743  *	version			uint8_t		0
744  *	ena			uint64_t	<ena>
745  *	detector		nvlist_t	<detector>
746  *	ereport-payload		nvlist_t	<var args>
747  *
748  * We don't actually add a 'version' member to the payload.  Really,
749  * the version quoted to us by our caller is that of the category 1
750  * "ereport" event class (and we require FM_EREPORT_VERS0) but
751  * the payload version of the actual leaf class event under construction
752  * may be something else.  Callers should supply a version in the varargs,
753  * or (better) we could take two version arguments - one for the
754  * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
755  * for the leaf class.
756  */
757 void
758 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
759     uint64_t ena, const nvlist_t *detector, ...)
760 {
761 	char ereport_class[FM_MAX_CLASS];
762 	const char *name;
763 	va_list ap;
764 	int ret;
765 
766 	if (version != FM_EREPORT_VERS0) {
767 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
768 		return;
769 	}
770 
771 	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
772 	    FM_EREPORT_CLASS, erpt_class);
773 	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
774 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
775 		return;
776 	}
777 
778 	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
779 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
780 	}
781 
782 	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
783 	    (nvlist_t *)detector) != 0) {
784 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
785 	}
786 
787 	va_start(ap, detector);
788 	name = va_arg(ap, const char *);
789 	ret = i_fm_payload_set(ereport, name, ap);
790 	va_end(ap);
791 
792 	if (ret)
793 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
794 }
795 
796 /*
797  * Set-up and validate the members of an hc fmri according to;
798  *
799  *	Member name		Type		Value
800  *	===================================================
801  *	version			uint8_t		0
802  *	auth			nvlist_t	<auth>
803  *	hc-name			string		<name>
804  *	hc-id			string		<id>
805  *
806  * Note that auth and hc-id are optional members.
807  */
808 
809 #define	HC_MAXPAIRS	20
810 #define	HC_MAXNAMELEN	50
811 
812 static int
813 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
814 {
815 	if (version != FM_HC_SCHEME_VERSION) {
816 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
817 		return (0);
818 	}
819 
820 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
821 	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
822 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
823 		return (0);
824 	}
825 
826 	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
827 	    (nvlist_t *)auth) != 0) {
828 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
829 		return (0);
830 	}
831 
832 	return (1);
833 }
834 
835 void
836 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
837     nvlist_t *snvl, int npairs, ...)
838 {
839 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
840 	nvlist_t *pairs[HC_MAXPAIRS];
841 	va_list ap;
842 	int i;
843 
844 	if (!fm_fmri_hc_set_common(fmri, version, auth))
845 		return;
846 
847 	npairs = MIN(npairs, HC_MAXPAIRS);
848 
849 	va_start(ap, npairs);
850 	for (i = 0; i < npairs; i++) {
851 		const char *name = va_arg(ap, const char *);
852 		uint32_t id = va_arg(ap, uint32_t);
853 		char idstr[11];
854 
855 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
856 
857 		pairs[i] = fm_nvlist_create(nva);
858 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
859 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
860 			atomic_inc_64(
861 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
862 		}
863 	}
864 	va_end(ap);
865 
866 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
867 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
868 
869 	for (i = 0; i < npairs; i++)
870 		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
871 
872 	if (snvl != NULL) {
873 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
874 			atomic_inc_64(
875 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
876 		}
877 	}
878 }
879 
880 void
881 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
882     nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
883 {
884 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
885 	nvlist_t *pairs[HC_MAXPAIRS];
886 	nvlist_t **hcl;
887 	uint_t n;
888 	int i, j;
889 	va_list ap;
890 	char *hcname, *hcid;
891 
892 	if (!fm_fmri_hc_set_common(fmri, version, auth))
893 		return;
894 
895 	/*
896 	 * copy the bboard nvpairs to the pairs array
897 	 */
898 	if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
899 	    != 0) {
900 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
901 		return;
902 	}
903 
904 	for (i = 0; i < n; i++) {
905 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
906 		    &hcname) != 0) {
907 			atomic_inc_64(
908 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
909 			return;
910 		}
911 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
912 			atomic_inc_64(
913 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
914 			return;
915 		}
916 
917 		pairs[i] = fm_nvlist_create(nva);
918 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
919 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
920 			for (j = 0; j <= i; j++) {
921 				if (pairs[j] != NULL)
922 					fm_nvlist_destroy(pairs[j],
923 					    FM_NVA_RETAIN);
924 			}
925 			atomic_inc_64(
926 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
927 			return;
928 		}
929 	}
930 
931 	/*
932 	 * create the pairs from passed in pairs
933 	 */
934 	npairs = MIN(npairs, HC_MAXPAIRS);
935 
936 	va_start(ap, npairs);
937 	for (i = n; i < npairs + n; i++) {
938 		const char *name = va_arg(ap, const char *);
939 		uint32_t id = va_arg(ap, uint32_t);
940 		char idstr[11];
941 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
942 		pairs[i] = fm_nvlist_create(nva);
943 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
944 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
945 			for (j = 0; j <= i; j++) {
946 				if (pairs[j] != NULL)
947 					fm_nvlist_destroy(pairs[j],
948 					    FM_NVA_RETAIN);
949 			}
950 			atomic_inc_64(
951 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
952 			return;
953 		}
954 	}
955 	va_end(ap);
956 
957 	/*
958 	 * Create the fmri hc list
959 	 */
960 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
961 	    npairs + n) != 0) {
962 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
963 		return;
964 	}
965 
966 	for (i = 0; i < npairs + n; i++) {
967 			fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
968 	}
969 
970 	if (snvl != NULL) {
971 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
972 			atomic_inc_64(
973 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
974 			return;
975 		}
976 	}
977 }
978 
979 /*
980  * Set-up and validate the members of an dev fmri according to:
981  *
982  *	Member name		Type		Value
983  *	====================================================
984  *	version			uint8_t		0
985  *	auth			nvlist_t	<auth>
986  *	devpath			string		<devpath>
987  *	[devid]			string		<devid>
988  *	[target-port-l0id]	string		<target-port-lun0-id>
989  *
990  * Note that auth and devid are optional members.
991  */
992 void
993 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
994     const char *devpath, const char *devid, const char *tpl0)
995 {
996 	int err = 0;
997 
998 	if (version != DEV_SCHEME_VERSION0) {
999 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1000 		return;
1001 	}
1002 
1003 	err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1004 	err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1005 
1006 	if (auth != NULL) {
1007 		err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1008 		    (nvlist_t *)auth);
1009 	}
1010 
1011 	err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1012 
1013 	if (devid != NULL)
1014 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1015 
1016 	if (tpl0 != NULL)
1017 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1018 
1019 	if (err)
1020 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1021 
1022 }
1023 
1024 /*
1025  * Set-up and validate the members of an cpu fmri according to:
1026  *
1027  *	Member name		Type		Value
1028  *	====================================================
1029  *	version			uint8_t		0
1030  *	auth			nvlist_t	<auth>
1031  *	cpuid			uint32_t	<cpu_id>
1032  *	cpumask			uint8_t		<cpu_mask>
1033  *	serial			uint64_t	<serial_id>
1034  *
1035  * Note that auth, cpumask, serial are optional members.
1036  *
1037  */
1038 void
1039 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1040     uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1041 {
1042 	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1043 
1044 	if (version < CPU_SCHEME_VERSION1) {
1045 		atomic_inc_64(failedp);
1046 		return;
1047 	}
1048 
1049 	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1050 		atomic_inc_64(failedp);
1051 		return;
1052 	}
1053 
1054 	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1055 	    FM_FMRI_SCHEME_CPU) != 0) {
1056 		atomic_inc_64(failedp);
1057 		return;
1058 	}
1059 
1060 	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1061 	    (nvlist_t *)auth) != 0)
1062 		atomic_inc_64(failedp);
1063 
1064 	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1065 		atomic_inc_64(failedp);
1066 
1067 	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1068 	    *cpu_maskp) != 0)
1069 		atomic_inc_64(failedp);
1070 
1071 	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1072 	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1073 			atomic_inc_64(failedp);
1074 }
1075 
1076 /*
1077  * Set-up and validate the members of a mem according to:
1078  *
1079  *	Member name		Type		Value
1080  *	====================================================
1081  *	version			uint8_t		0
1082  *	auth			nvlist_t	<auth>		[optional]
1083  *	unum			string		<unum>
1084  *	serial			string		<serial>	[optional*]
1085  *	offset			uint64_t	<offset>	[optional]
1086  *
1087  *	* serial is required if offset is present
1088  */
1089 void
1090 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1091     const char *unum, const char *serial, uint64_t offset)
1092 {
1093 	if (version != MEM_SCHEME_VERSION0) {
1094 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1095 		return;
1096 	}
1097 
1098 	if (!serial && (offset != (uint64_t)-1)) {
1099 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1100 		return;
1101 	}
1102 
1103 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1104 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1105 		return;
1106 	}
1107 
1108 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1109 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1110 		return;
1111 	}
1112 
1113 	if (auth != NULL) {
1114 		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1115 		    (nvlist_t *)auth) != 0) {
1116 			atomic_inc_64(
1117 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1118 		}
1119 	}
1120 
1121 	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1122 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1123 	}
1124 
1125 	if (serial != NULL) {
1126 		if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1127 		    (char **)&serial, 1) != 0) {
1128 			atomic_inc_64(
1129 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1130 		}
1131 		if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1132 		    FM_FMRI_MEM_OFFSET, offset) != 0) {
1133 			atomic_inc_64(
1134 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1135 		}
1136 	}
1137 }
1138 
1139 void
1140 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1141     uint64_t vdev_guid)
1142 {
1143 	if (version != ZFS_SCHEME_VERSION0) {
1144 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1145 		return;
1146 	}
1147 
1148 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1149 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1150 		return;
1151 	}
1152 
1153 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1154 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1155 		return;
1156 	}
1157 
1158 	if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1159 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1160 	}
1161 
1162 	if (vdev_guid != 0) {
1163 		if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1164 			atomic_inc_64(
1165 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1166 		}
1167 	}
1168 }
1169 
1170 uint64_t
1171 fm_ena_increment(uint64_t ena)
1172 {
1173 	uint64_t new_ena;
1174 
1175 	switch (ENA_FORMAT(ena)) {
1176 	case FM_ENA_FMT1:
1177 		new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1178 		break;
1179 	case FM_ENA_FMT2:
1180 		new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1181 		break;
1182 	default:
1183 		new_ena = 0;
1184 	}
1185 
1186 	return (new_ena);
1187 }
1188 
1189 uint64_t
1190 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1191 {
1192 	uint64_t ena = 0;
1193 
1194 	switch (format) {
1195 	case FM_ENA_FMT1:
1196 		if (timestamp) {
1197 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1198 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1199 			    ENA_FMT1_CPUID_MASK) |
1200 			    ((timestamp << ENA_FMT1_TIME_SHFT) &
1201 			    ENA_FMT1_TIME_MASK));
1202 		} else {
1203 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1204 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1205 			    ENA_FMT1_CPUID_MASK) |
1206 			    ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1207 			    ENA_FMT1_TIME_MASK));
1208 		}
1209 		break;
1210 	case FM_ENA_FMT2:
1211 		ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1212 		    ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1213 		break;
1214 	default:
1215 		break;
1216 	}
1217 
1218 	return (ena);
1219 }
1220 
1221 uint64_t
1222 fm_ena_generate(uint64_t timestamp, uchar_t format)
1223 {
1224 	uint64_t ena;
1225 
1226 	kpreempt_disable();
1227 	ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1228 	kpreempt_enable();
1229 
1230 	return (ena);
1231 }
1232 
1233 uint64_t
1234 fm_ena_generation_get(uint64_t ena)
1235 {
1236 	uint64_t gen;
1237 
1238 	switch (ENA_FORMAT(ena)) {
1239 	case FM_ENA_FMT1:
1240 		gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1241 		break;
1242 	case FM_ENA_FMT2:
1243 		gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1244 		break;
1245 	default:
1246 		gen = 0;
1247 		break;
1248 	}
1249 
1250 	return (gen);
1251 }
1252 
1253 uchar_t
1254 fm_ena_format_get(uint64_t ena)
1255 {
1256 
1257 	return (ENA_FORMAT(ena));
1258 }
1259 
1260 uint64_t
1261 fm_ena_id_get(uint64_t ena)
1262 {
1263 	uint64_t id;
1264 
1265 	switch (ENA_FORMAT(ena)) {
1266 	case FM_ENA_FMT1:
1267 		id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1268 		break;
1269 	case FM_ENA_FMT2:
1270 		id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1271 		break;
1272 	default:
1273 		id = 0;
1274 	}
1275 
1276 	return (id);
1277 }
1278 
1279 uint64_t
1280 fm_ena_time_get(uint64_t ena)
1281 {
1282 	uint64_t time;
1283 
1284 	switch (ENA_FORMAT(ena)) {
1285 	case FM_ENA_FMT1:
1286 		time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1287 		break;
1288 	case FM_ENA_FMT2:
1289 		time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1290 		break;
1291 	default:
1292 		time = 0;
1293 	}
1294 
1295 	return (time);
1296 }
1297 
1298 #ifdef _KERNEL
1299 /*
1300  * Helper function to increment ereport dropped count.  Used by the event
1301  * rate limiting code to give feedback to the user about how many events were
1302  * rate limited by including them in the 'dropped' count.
1303  */
1304 void
1305 fm_erpt_dropped_increment(void)
1306 {
1307 	atomic_inc_64(&ratelimit_dropped);
1308 }
1309 
1310 void
1311 fm_init(void)
1312 {
1313 	zevent_len_cur = 0;
1314 	zevent_flags = 0;
1315 
1316 	/* Initialize zevent allocation and generation kstats */
1317 	fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1318 	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1319 	    KSTAT_FLAG_VIRTUAL);
1320 
1321 	if (fm_ksp != NULL) {
1322 		fm_ksp->ks_data = &erpt_kstat_data;
1323 		kstat_install(fm_ksp);
1324 	} else {
1325 		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1326 	}
1327 
1328 	mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1329 	list_create(&zevent_list, sizeof (zevent_t),
1330 	    offsetof(zevent_t, ev_node));
1331 	cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1332 
1333 	zfs_ereport_init();
1334 }
1335 
1336 void
1337 fm_fini(void)
1338 {
1339 	int count;
1340 
1341 	zfs_ereport_fini();
1342 
1343 	zfs_zevent_drain_all(&count);
1344 
1345 	mutex_enter(&zevent_lock);
1346 	cv_broadcast(&zevent_cv);
1347 
1348 	zevent_flags |= ZEVENT_SHUTDOWN;
1349 	while (zevent_waiters > 0) {
1350 		mutex_exit(&zevent_lock);
1351 		schedule();
1352 		mutex_enter(&zevent_lock);
1353 	}
1354 	mutex_exit(&zevent_lock);
1355 
1356 	cv_destroy(&zevent_cv);
1357 	list_destroy(&zevent_list);
1358 	mutex_destroy(&zevent_lock);
1359 
1360 	if (fm_ksp != NULL) {
1361 		kstat_delete(fm_ksp);
1362 		fm_ksp = NULL;
1363 	}
1364 }
1365 #endif /* _KERNEL */
1366 
1367 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW,
1368 	"Max event queue length");
1369