1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Fault Management Architecture (FMA) Resource and Protocol Support 27 * 28 * The routines contained herein provide services to support kernel subsystems 29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089). 30 * 31 * Name-Value Pair Lists 32 * 33 * The embodiment of an FMA protocol element (event, fmri or authority) is a 34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and 35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used 36 * to create an nvpair list using custom allocators. Callers may choose to 37 * allocate either from the kernel memory allocator, or from a preallocated 38 * buffer, useful in constrained contexts like high-level interrupt routines. 39 * 40 * Protocol Event and FMRI Construction 41 * 42 * Convenience routines are provided to construct nvlist events according to 43 * the FMA Event Protocol and Naming Schema specification for ereports and 44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes. 45 * 46 * ENA Manipulation 47 * 48 * Routines to generate ENA formats 0, 1 and 2 are available as well as 49 * routines to increment formats 1 and 2. Individual fields within the 50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(), 51 * fm_ena_format_get() and fm_ena_gen_get(). 52 */ 53 54 #include <sys/types.h> 55 #include <sys/time.h> 56 #include <sys/list.h> 57 #include <sys/nvpair.h> 58 #include <sys/cmn_err.h> 59 #include <sys/sysmacros.h> 60 #include <sys/sunddi.h> 61 #include <sys/systeminfo.h> 62 #include <sys/fm/util.h> 63 #include <sys/fm/protocol.h> 64 #include <sys/kstat.h> 65 #include <sys/zfs_context.h> 66 #ifdef _KERNEL 67 #include <sys/atomic.h> 68 #include <sys/condvar.h> 69 #include <sys/zfs_ioctl.h> 70 71 int zfs_zevent_len_max = 512; 72 73 static int zevent_len_cur = 0; 74 static int zevent_waiters = 0; 75 static int zevent_flags = 0; 76 77 /* Num events rate limited since the last time zfs_zevent_next() was called */ 78 static uint64_t ratelimit_dropped = 0; 79 80 /* 81 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is 82 * posted. The posted EIDs are monotonically increasing but not persistent. 83 * They will be reset to the initial value (1) each time the kernel module is 84 * loaded. 85 */ 86 static uint64_t zevent_eid = 0; 87 88 static kmutex_t zevent_lock; 89 static list_t zevent_list; 90 static kcondvar_t zevent_cv; 91 #endif /* _KERNEL */ 92 93 94 /* 95 * Common fault management kstats to record event generation failures 96 */ 97 98 struct erpt_kstat { 99 kstat_named_t erpt_dropped; /* num erpts dropped on post */ 100 kstat_named_t erpt_set_failed; /* num erpt set failures */ 101 kstat_named_t fmri_set_failed; /* num fmri set failures */ 102 kstat_named_t payload_set_failed; /* num payload set failures */ 103 kstat_named_t erpt_duplicates; /* num duplicate erpts */ 104 }; 105 106 static struct erpt_kstat erpt_kstat_data = { 107 { "erpt-dropped", KSTAT_DATA_UINT64 }, 108 { "erpt-set-failed", KSTAT_DATA_UINT64 }, 109 { "fmri-set-failed", KSTAT_DATA_UINT64 }, 110 { "payload-set-failed", KSTAT_DATA_UINT64 }, 111 { "erpt-duplicates", KSTAT_DATA_UINT64 } 112 }; 113 114 kstat_t *fm_ksp; 115 116 #ifdef _KERNEL 117 118 static zevent_t * 119 zfs_zevent_alloc(void) 120 { 121 zevent_t *ev; 122 123 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP); 124 125 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t), 126 offsetof(zfs_zevent_t, ze_node)); 127 list_link_init(&ev->ev_node); 128 129 return (ev); 130 } 131 132 static void 133 zfs_zevent_free(zevent_t *ev) 134 { 135 /* Run provided cleanup callback */ 136 ev->ev_cb(ev->ev_nvl, ev->ev_detector); 137 138 list_destroy(&ev->ev_ze_list); 139 kmem_free(ev, sizeof (zevent_t)); 140 } 141 142 static void 143 zfs_zevent_drain(zevent_t *ev) 144 { 145 zfs_zevent_t *ze; 146 147 ASSERT(MUTEX_HELD(&zevent_lock)); 148 list_remove(&zevent_list, ev); 149 150 /* Remove references to this event in all private file data */ 151 while ((ze = list_head(&ev->ev_ze_list)) != NULL) { 152 list_remove(&ev->ev_ze_list, ze); 153 ze->ze_zevent = NULL; 154 ze->ze_dropped++; 155 } 156 157 zfs_zevent_free(ev); 158 } 159 160 void 161 zfs_zevent_drain_all(int *count) 162 { 163 zevent_t *ev; 164 165 mutex_enter(&zevent_lock); 166 while ((ev = list_head(&zevent_list)) != NULL) 167 zfs_zevent_drain(ev); 168 169 *count = zevent_len_cur; 170 zevent_len_cur = 0; 171 mutex_exit(&zevent_lock); 172 } 173 174 /* 175 * New zevents are inserted at the head. If the maximum queue 176 * length is exceeded a zevent will be drained from the tail. 177 * As part of this any user space processes which currently have 178 * a reference to this zevent_t in their private data will have 179 * this reference set to NULL. 180 */ 181 static void 182 zfs_zevent_insert(zevent_t *ev) 183 { 184 ASSERT(MUTEX_HELD(&zevent_lock)); 185 list_insert_head(&zevent_list, ev); 186 187 if (zevent_len_cur >= zfs_zevent_len_max) 188 zfs_zevent_drain(list_tail(&zevent_list)); 189 else 190 zevent_len_cur++; 191 } 192 193 /* 194 * Post a zevent. The cb will be called when nvl and detector are no longer 195 * needed, i.e.: 196 * - An error happened and a zevent can't be posted. In this case, cb is called 197 * before zfs_zevent_post() returns. 198 * - The event is being drained and freed. 199 */ 200 int 201 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) 202 { 203 inode_timespec_t tv; 204 int64_t tv_array[2]; 205 uint64_t eid; 206 size_t nvl_size = 0; 207 zevent_t *ev; 208 int error; 209 210 ASSERT(cb != NULL); 211 212 gethrestime(&tv); 213 tv_array[0] = tv.tv_sec; 214 tv_array[1] = tv.tv_nsec; 215 216 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2); 217 if (error) { 218 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 219 goto out; 220 } 221 222 eid = atomic_inc_64_nv(&zevent_eid); 223 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid); 224 if (error) { 225 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 226 goto out; 227 } 228 229 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); 230 if (error) { 231 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 232 goto out; 233 } 234 235 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { 236 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 237 error = EOVERFLOW; 238 goto out; 239 } 240 241 ev = zfs_zevent_alloc(); 242 if (ev == NULL) { 243 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 244 error = ENOMEM; 245 goto out; 246 } 247 248 ev->ev_nvl = nvl; 249 ev->ev_detector = detector; 250 ev->ev_cb = cb; 251 ev->ev_eid = eid; 252 253 mutex_enter(&zevent_lock); 254 zfs_zevent_insert(ev); 255 cv_broadcast(&zevent_cv); 256 mutex_exit(&zevent_lock); 257 258 out: 259 if (error) 260 cb(nvl, detector); 261 262 return (error); 263 } 264 265 void 266 zfs_zevent_track_duplicate(void) 267 { 268 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64); 269 } 270 271 static int 272 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze) 273 { 274 *ze = zfsdev_get_state(minor, ZST_ZEVENT); 275 if (*ze == NULL) 276 return (SET_ERROR(EBADF)); 277 278 return (0); 279 } 280 281 int 282 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) 283 { 284 int error; 285 286 error = zfsdev_getminor(fd, minorp); 287 if (error == 0) 288 error = zfs_zevent_minor_to_state(*minorp, ze); 289 290 if (error) 291 zfs_zevent_fd_rele(fd); 292 293 return (error); 294 } 295 296 void 297 zfs_zevent_fd_rele(int fd) 298 { 299 zfs_file_put(fd); 300 } 301 302 /* 303 * Get the next zevent in the stream and place a copy in 'event'. This 304 * may fail with ENOMEM if the encoded nvlist size exceeds the passed 305 * 'event_size'. In this case the stream pointer is not advanced and 306 * and 'event_size' is set to the minimum required buffer size. 307 */ 308 int 309 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, 310 uint64_t *dropped) 311 { 312 zevent_t *ev; 313 size_t size; 314 int error = 0; 315 316 mutex_enter(&zevent_lock); 317 if (ze->ze_zevent == NULL) { 318 /* New stream start at the beginning/tail */ 319 ev = list_tail(&zevent_list); 320 if (ev == NULL) { 321 error = ENOENT; 322 goto out; 323 } 324 } else { 325 /* 326 * Existing stream continue with the next element and remove 327 * ourselves from the wait queue for the previous element 328 */ 329 ev = list_prev(&zevent_list, ze->ze_zevent); 330 if (ev == NULL) { 331 error = ENOENT; 332 goto out; 333 } 334 } 335 336 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); 337 if (size > *event_size) { 338 *event_size = size; 339 error = ENOMEM; 340 goto out; 341 } 342 343 if (ze->ze_zevent) 344 list_remove(&ze->ze_zevent->ev_ze_list, ze); 345 346 ze->ze_zevent = ev; 347 list_insert_head(&ev->ev_ze_list, ze); 348 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP); 349 *dropped = ze->ze_dropped; 350 351 #ifdef _KERNEL 352 /* Include events dropped due to rate limiting */ 353 *dropped += atomic_swap_64(&ratelimit_dropped, 0); 354 #endif 355 ze->ze_dropped = 0; 356 out: 357 mutex_exit(&zevent_lock); 358 359 return (error); 360 } 361 362 /* 363 * Wait in an interruptible state for any new events. 364 */ 365 int 366 zfs_zevent_wait(zfs_zevent_t *ze) 367 { 368 int error = EAGAIN; 369 370 mutex_enter(&zevent_lock); 371 zevent_waiters++; 372 373 while (error == EAGAIN) { 374 if (zevent_flags & ZEVENT_SHUTDOWN) { 375 error = SET_ERROR(ESHUTDOWN); 376 break; 377 } 378 379 error = cv_wait_sig(&zevent_cv, &zevent_lock); 380 if (signal_pending(current)) { 381 error = SET_ERROR(EINTR); 382 break; 383 } else if (!list_is_empty(&zevent_list)) { 384 error = 0; 385 continue; 386 } else { 387 error = EAGAIN; 388 } 389 } 390 391 zevent_waiters--; 392 mutex_exit(&zevent_lock); 393 394 return (error); 395 } 396 397 /* 398 * The caller may seek to a specific EID by passing that EID. If the EID 399 * is still available in the posted list of events the cursor is positioned 400 * there. Otherwise ENOENT is returned and the cursor is not moved. 401 * 402 * There are two reserved EIDs which may be passed and will never fail. 403 * ZEVENT_SEEK_START positions the cursor at the start of the list, and 404 * ZEVENT_SEEK_END positions the cursor at the end of the list. 405 */ 406 int 407 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid) 408 { 409 zevent_t *ev; 410 int error = 0; 411 412 mutex_enter(&zevent_lock); 413 414 if (eid == ZEVENT_SEEK_START) { 415 if (ze->ze_zevent) 416 list_remove(&ze->ze_zevent->ev_ze_list, ze); 417 418 ze->ze_zevent = NULL; 419 goto out; 420 } 421 422 if (eid == ZEVENT_SEEK_END) { 423 if (ze->ze_zevent) 424 list_remove(&ze->ze_zevent->ev_ze_list, ze); 425 426 ev = list_head(&zevent_list); 427 if (ev) { 428 ze->ze_zevent = ev; 429 list_insert_head(&ev->ev_ze_list, ze); 430 } else { 431 ze->ze_zevent = NULL; 432 } 433 434 goto out; 435 } 436 437 for (ev = list_tail(&zevent_list); ev != NULL; 438 ev = list_prev(&zevent_list, ev)) { 439 if (ev->ev_eid == eid) { 440 if (ze->ze_zevent) 441 list_remove(&ze->ze_zevent->ev_ze_list, ze); 442 443 ze->ze_zevent = ev; 444 list_insert_head(&ev->ev_ze_list, ze); 445 break; 446 } 447 } 448 449 if (ev == NULL) 450 error = ENOENT; 451 452 out: 453 mutex_exit(&zevent_lock); 454 455 return (error); 456 } 457 458 void 459 zfs_zevent_init(zfs_zevent_t **zep) 460 { 461 zfs_zevent_t *ze; 462 463 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP); 464 list_link_init(&ze->ze_node); 465 } 466 467 void 468 zfs_zevent_destroy(zfs_zevent_t *ze) 469 { 470 mutex_enter(&zevent_lock); 471 if (ze->ze_zevent) 472 list_remove(&ze->ze_zevent->ev_ze_list, ze); 473 mutex_exit(&zevent_lock); 474 475 kmem_free(ze, sizeof (zfs_zevent_t)); 476 } 477 #endif /* _KERNEL */ 478 479 /* 480 * Wrappers for FM nvlist allocators 481 */ 482 /* ARGSUSED */ 483 static void * 484 i_fm_alloc(nv_alloc_t *nva, size_t size) 485 { 486 return (kmem_zalloc(size, KM_SLEEP)); 487 } 488 489 /* ARGSUSED */ 490 static void 491 i_fm_free(nv_alloc_t *nva, void *buf, size_t size) 492 { 493 kmem_free(buf, size); 494 } 495 496 const nv_alloc_ops_t fm_mem_alloc_ops = { 497 .nv_ao_init = NULL, 498 .nv_ao_fini = NULL, 499 .nv_ao_alloc = i_fm_alloc, 500 .nv_ao_free = i_fm_free, 501 .nv_ao_reset = NULL 502 }; 503 504 /* 505 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer 506 * to the newly allocated nv_alloc_t structure is returned upon success or NULL 507 * is returned to indicate that the nv_alloc structure could not be created. 508 */ 509 nv_alloc_t * 510 fm_nva_xcreate(char *buf, size_t bufsz) 511 { 512 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 513 514 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) { 515 kmem_free(nvhdl, sizeof (nv_alloc_t)); 516 return (NULL); 517 } 518 519 return (nvhdl); 520 } 521 522 /* 523 * Destroy a previously allocated nv_alloc structure. The fixed buffer 524 * associated with nva must be freed by the caller. 525 */ 526 void 527 fm_nva_xdestroy(nv_alloc_t *nva) 528 { 529 nv_alloc_fini(nva); 530 kmem_free(nva, sizeof (nv_alloc_t)); 531 } 532 533 /* 534 * Create a new nv list. A pointer to a new nv list structure is returned 535 * upon success or NULL is returned to indicate that the structure could 536 * not be created. The newly created nv list is created and managed by the 537 * operations installed in nva. If nva is NULL, the default FMA nva 538 * operations are installed and used. 539 * 540 * When called from the kernel and nva == NULL, this function must be called 541 * from passive kernel context with no locks held that can prevent a 542 * sleeping memory allocation from occurring. Otherwise, this function may 543 * be called from other kernel contexts as long a valid nva created via 544 * fm_nva_create() is supplied. 545 */ 546 nvlist_t * 547 fm_nvlist_create(nv_alloc_t *nva) 548 { 549 int hdl_alloced = 0; 550 nvlist_t *nvl; 551 nv_alloc_t *nvhdl; 552 553 if (nva == NULL) { 554 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 555 556 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { 557 kmem_free(nvhdl, sizeof (nv_alloc_t)); 558 return (NULL); 559 } 560 hdl_alloced = 1; 561 } else { 562 nvhdl = nva; 563 } 564 565 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { 566 if (hdl_alloced) { 567 nv_alloc_fini(nvhdl); 568 kmem_free(nvhdl, sizeof (nv_alloc_t)); 569 } 570 return (NULL); 571 } 572 573 return (nvl); 574 } 575 576 /* 577 * Destroy a previously allocated nvlist structure. flag indicates whether 578 * or not the associated nva structure should be freed (FM_NVA_FREE) or 579 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows 580 * it to be re-used for future nvlist creation operations. 581 */ 582 void 583 fm_nvlist_destroy(nvlist_t *nvl, int flag) 584 { 585 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl); 586 587 nvlist_free(nvl); 588 589 if (nva != NULL) { 590 if (flag == FM_NVA_FREE) 591 fm_nva_xdestroy(nva); 592 } 593 } 594 595 int 596 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap) 597 { 598 int nelem, ret = 0; 599 data_type_t type; 600 601 while (ret == 0 && name != NULL) { 602 type = va_arg(ap, data_type_t); 603 switch (type) { 604 case DATA_TYPE_BYTE: 605 ret = nvlist_add_byte(payload, name, 606 va_arg(ap, uint_t)); 607 break; 608 case DATA_TYPE_BYTE_ARRAY: 609 nelem = va_arg(ap, int); 610 ret = nvlist_add_byte_array(payload, name, 611 va_arg(ap, uchar_t *), nelem); 612 break; 613 case DATA_TYPE_BOOLEAN_VALUE: 614 ret = nvlist_add_boolean_value(payload, name, 615 va_arg(ap, boolean_t)); 616 break; 617 case DATA_TYPE_BOOLEAN_ARRAY: 618 nelem = va_arg(ap, int); 619 ret = nvlist_add_boolean_array(payload, name, 620 va_arg(ap, boolean_t *), nelem); 621 break; 622 case DATA_TYPE_INT8: 623 ret = nvlist_add_int8(payload, name, 624 va_arg(ap, int)); 625 break; 626 case DATA_TYPE_INT8_ARRAY: 627 nelem = va_arg(ap, int); 628 ret = nvlist_add_int8_array(payload, name, 629 va_arg(ap, int8_t *), nelem); 630 break; 631 case DATA_TYPE_UINT8: 632 ret = nvlist_add_uint8(payload, name, 633 va_arg(ap, uint_t)); 634 break; 635 case DATA_TYPE_UINT8_ARRAY: 636 nelem = va_arg(ap, int); 637 ret = nvlist_add_uint8_array(payload, name, 638 va_arg(ap, uint8_t *), nelem); 639 break; 640 case DATA_TYPE_INT16: 641 ret = nvlist_add_int16(payload, name, 642 va_arg(ap, int)); 643 break; 644 case DATA_TYPE_INT16_ARRAY: 645 nelem = va_arg(ap, int); 646 ret = nvlist_add_int16_array(payload, name, 647 va_arg(ap, int16_t *), nelem); 648 break; 649 case DATA_TYPE_UINT16: 650 ret = nvlist_add_uint16(payload, name, 651 va_arg(ap, uint_t)); 652 break; 653 case DATA_TYPE_UINT16_ARRAY: 654 nelem = va_arg(ap, int); 655 ret = nvlist_add_uint16_array(payload, name, 656 va_arg(ap, uint16_t *), nelem); 657 break; 658 case DATA_TYPE_INT32: 659 ret = nvlist_add_int32(payload, name, 660 va_arg(ap, int32_t)); 661 break; 662 case DATA_TYPE_INT32_ARRAY: 663 nelem = va_arg(ap, int); 664 ret = nvlist_add_int32_array(payload, name, 665 va_arg(ap, int32_t *), nelem); 666 break; 667 case DATA_TYPE_UINT32: 668 ret = nvlist_add_uint32(payload, name, 669 va_arg(ap, uint32_t)); 670 break; 671 case DATA_TYPE_UINT32_ARRAY: 672 nelem = va_arg(ap, int); 673 ret = nvlist_add_uint32_array(payload, name, 674 va_arg(ap, uint32_t *), nelem); 675 break; 676 case DATA_TYPE_INT64: 677 ret = nvlist_add_int64(payload, name, 678 va_arg(ap, int64_t)); 679 break; 680 case DATA_TYPE_INT64_ARRAY: 681 nelem = va_arg(ap, int); 682 ret = nvlist_add_int64_array(payload, name, 683 va_arg(ap, int64_t *), nelem); 684 break; 685 case DATA_TYPE_UINT64: 686 ret = nvlist_add_uint64(payload, name, 687 va_arg(ap, uint64_t)); 688 break; 689 case DATA_TYPE_UINT64_ARRAY: 690 nelem = va_arg(ap, int); 691 ret = nvlist_add_uint64_array(payload, name, 692 va_arg(ap, uint64_t *), nelem); 693 break; 694 case DATA_TYPE_STRING: 695 ret = nvlist_add_string(payload, name, 696 va_arg(ap, char *)); 697 break; 698 case DATA_TYPE_STRING_ARRAY: 699 nelem = va_arg(ap, int); 700 ret = nvlist_add_string_array(payload, name, 701 va_arg(ap, char **), nelem); 702 break; 703 case DATA_TYPE_NVLIST: 704 ret = nvlist_add_nvlist(payload, name, 705 va_arg(ap, nvlist_t *)); 706 break; 707 case DATA_TYPE_NVLIST_ARRAY: 708 nelem = va_arg(ap, int); 709 ret = nvlist_add_nvlist_array(payload, name, 710 va_arg(ap, nvlist_t **), nelem); 711 break; 712 default: 713 ret = EINVAL; 714 } 715 716 name = va_arg(ap, char *); 717 } 718 return (ret); 719 } 720 721 void 722 fm_payload_set(nvlist_t *payload, ...) 723 { 724 int ret; 725 const char *name; 726 va_list ap; 727 728 va_start(ap, payload); 729 name = va_arg(ap, char *); 730 ret = i_fm_payload_set(payload, name, ap); 731 va_end(ap); 732 733 if (ret) 734 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64); 735 } 736 737 /* 738 * Set-up and validate the members of an ereport event according to: 739 * 740 * Member name Type Value 741 * ==================================================== 742 * class string ereport 743 * version uint8_t 0 744 * ena uint64_t <ena> 745 * detector nvlist_t <detector> 746 * ereport-payload nvlist_t <var args> 747 * 748 * We don't actually add a 'version' member to the payload. Really, 749 * the version quoted to us by our caller is that of the category 1 750 * "ereport" event class (and we require FM_EREPORT_VERS0) but 751 * the payload version of the actual leaf class event under construction 752 * may be something else. Callers should supply a version in the varargs, 753 * or (better) we could take two version arguments - one for the 754 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one 755 * for the leaf class. 756 */ 757 void 758 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, 759 uint64_t ena, const nvlist_t *detector, ...) 760 { 761 char ereport_class[FM_MAX_CLASS]; 762 const char *name; 763 va_list ap; 764 int ret; 765 766 if (version != FM_EREPORT_VERS0) { 767 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 768 return; 769 } 770 771 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s", 772 FM_EREPORT_CLASS, erpt_class); 773 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) { 774 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 775 return; 776 } 777 778 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) { 779 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 780 } 781 782 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR, 783 (nvlist_t *)detector) != 0) { 784 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 785 } 786 787 va_start(ap, detector); 788 name = va_arg(ap, const char *); 789 ret = i_fm_payload_set(ereport, name, ap); 790 va_end(ap); 791 792 if (ret) 793 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 794 } 795 796 /* 797 * Set-up and validate the members of an hc fmri according to; 798 * 799 * Member name Type Value 800 * =================================================== 801 * version uint8_t 0 802 * auth nvlist_t <auth> 803 * hc-name string <name> 804 * hc-id string <id> 805 * 806 * Note that auth and hc-id are optional members. 807 */ 808 809 #define HC_MAXPAIRS 20 810 #define HC_MAXNAMELEN 50 811 812 static int 813 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth) 814 { 815 if (version != FM_HC_SCHEME_VERSION) { 816 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 817 return (0); 818 } 819 820 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 || 821 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) { 822 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 823 return (0); 824 } 825 826 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 827 (nvlist_t *)auth) != 0) { 828 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 829 return (0); 830 } 831 832 return (1); 833 } 834 835 void 836 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, 837 nvlist_t *snvl, int npairs, ...) 838 { 839 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 840 nvlist_t *pairs[HC_MAXPAIRS]; 841 va_list ap; 842 int i; 843 844 if (!fm_fmri_hc_set_common(fmri, version, auth)) 845 return; 846 847 npairs = MIN(npairs, HC_MAXPAIRS); 848 849 va_start(ap, npairs); 850 for (i = 0; i < npairs; i++) { 851 const char *name = va_arg(ap, const char *); 852 uint32_t id = va_arg(ap, uint32_t); 853 char idstr[11]; 854 855 (void) snprintf(idstr, sizeof (idstr), "%u", id); 856 857 pairs[i] = fm_nvlist_create(nva); 858 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 859 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 860 atomic_inc_64( 861 &erpt_kstat_data.fmri_set_failed.value.ui64); 862 } 863 } 864 va_end(ap); 865 866 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0) 867 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 868 869 for (i = 0; i < npairs; i++) 870 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 871 872 if (snvl != NULL) { 873 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 874 atomic_inc_64( 875 &erpt_kstat_data.fmri_set_failed.value.ui64); 876 } 877 } 878 } 879 880 void 881 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, 882 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) 883 { 884 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 885 nvlist_t *pairs[HC_MAXPAIRS]; 886 nvlist_t **hcl; 887 uint_t n; 888 int i, j; 889 va_list ap; 890 char *hcname, *hcid; 891 892 if (!fm_fmri_hc_set_common(fmri, version, auth)) 893 return; 894 895 /* 896 * copy the bboard nvpairs to the pairs array 897 */ 898 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) 899 != 0) { 900 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 901 return; 902 } 903 904 for (i = 0; i < n; i++) { 905 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, 906 &hcname) != 0) { 907 atomic_inc_64( 908 &erpt_kstat_data.fmri_set_failed.value.ui64); 909 return; 910 } 911 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { 912 atomic_inc_64( 913 &erpt_kstat_data.fmri_set_failed.value.ui64); 914 return; 915 } 916 917 pairs[i] = fm_nvlist_create(nva); 918 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || 919 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { 920 for (j = 0; j <= i; j++) { 921 if (pairs[j] != NULL) 922 fm_nvlist_destroy(pairs[j], 923 FM_NVA_RETAIN); 924 } 925 atomic_inc_64( 926 &erpt_kstat_data.fmri_set_failed.value.ui64); 927 return; 928 } 929 } 930 931 /* 932 * create the pairs from passed in pairs 933 */ 934 npairs = MIN(npairs, HC_MAXPAIRS); 935 936 va_start(ap, npairs); 937 for (i = n; i < npairs + n; i++) { 938 const char *name = va_arg(ap, const char *); 939 uint32_t id = va_arg(ap, uint32_t); 940 char idstr[11]; 941 (void) snprintf(idstr, sizeof (idstr), "%u", id); 942 pairs[i] = fm_nvlist_create(nva); 943 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 944 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 945 for (j = 0; j <= i; j++) { 946 if (pairs[j] != NULL) 947 fm_nvlist_destroy(pairs[j], 948 FM_NVA_RETAIN); 949 } 950 atomic_inc_64( 951 &erpt_kstat_data.fmri_set_failed.value.ui64); 952 return; 953 } 954 } 955 va_end(ap); 956 957 /* 958 * Create the fmri hc list 959 */ 960 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, 961 npairs + n) != 0) { 962 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 963 return; 964 } 965 966 for (i = 0; i < npairs + n; i++) { 967 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 968 } 969 970 if (snvl != NULL) { 971 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 972 atomic_inc_64( 973 &erpt_kstat_data.fmri_set_failed.value.ui64); 974 return; 975 } 976 } 977 } 978 979 /* 980 * Set-up and validate the members of an dev fmri according to: 981 * 982 * Member name Type Value 983 * ==================================================== 984 * version uint8_t 0 985 * auth nvlist_t <auth> 986 * devpath string <devpath> 987 * [devid] string <devid> 988 * [target-port-l0id] string <target-port-lun0-id> 989 * 990 * Note that auth and devid are optional members. 991 */ 992 void 993 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, 994 const char *devpath, const char *devid, const char *tpl0) 995 { 996 int err = 0; 997 998 if (version != DEV_SCHEME_VERSION0) { 999 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1000 return; 1001 } 1002 1003 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version); 1004 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); 1005 1006 if (auth != NULL) { 1007 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, 1008 (nvlist_t *)auth); 1009 } 1010 1011 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath); 1012 1013 if (devid != NULL) 1014 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid); 1015 1016 if (tpl0 != NULL) 1017 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0); 1018 1019 if (err) 1020 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1021 1022 } 1023 1024 /* 1025 * Set-up and validate the members of an cpu fmri according to: 1026 * 1027 * Member name Type Value 1028 * ==================================================== 1029 * version uint8_t 0 1030 * auth nvlist_t <auth> 1031 * cpuid uint32_t <cpu_id> 1032 * cpumask uint8_t <cpu_mask> 1033 * serial uint64_t <serial_id> 1034 * 1035 * Note that auth, cpumask, serial are optional members. 1036 * 1037 */ 1038 void 1039 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth, 1040 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp) 1041 { 1042 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64; 1043 1044 if (version < CPU_SCHEME_VERSION1) { 1045 atomic_inc_64(failedp); 1046 return; 1047 } 1048 1049 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) { 1050 atomic_inc_64(failedp); 1051 return; 1052 } 1053 1054 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME, 1055 FM_FMRI_SCHEME_CPU) != 0) { 1056 atomic_inc_64(failedp); 1057 return; 1058 } 1059 1060 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY, 1061 (nvlist_t *)auth) != 0) 1062 atomic_inc_64(failedp); 1063 1064 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0) 1065 atomic_inc_64(failedp); 1066 1067 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK, 1068 *cpu_maskp) != 0) 1069 atomic_inc_64(failedp); 1070 1071 if (serial_idp == NULL || nvlist_add_string(fmri_cpu, 1072 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0) 1073 atomic_inc_64(failedp); 1074 } 1075 1076 /* 1077 * Set-up and validate the members of a mem according to: 1078 * 1079 * Member name Type Value 1080 * ==================================================== 1081 * version uint8_t 0 1082 * auth nvlist_t <auth> [optional] 1083 * unum string <unum> 1084 * serial string <serial> [optional*] 1085 * offset uint64_t <offset> [optional] 1086 * 1087 * * serial is required if offset is present 1088 */ 1089 void 1090 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth, 1091 const char *unum, const char *serial, uint64_t offset) 1092 { 1093 if (version != MEM_SCHEME_VERSION0) { 1094 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1095 return; 1096 } 1097 1098 if (!serial && (offset != (uint64_t)-1)) { 1099 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1100 return; 1101 } 1102 1103 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1104 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1105 return; 1106 } 1107 1108 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) { 1109 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1110 return; 1111 } 1112 1113 if (auth != NULL) { 1114 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 1115 (nvlist_t *)auth) != 0) { 1116 atomic_inc_64( 1117 &erpt_kstat_data.fmri_set_failed.value.ui64); 1118 } 1119 } 1120 1121 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) { 1122 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1123 } 1124 1125 if (serial != NULL) { 1126 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID, 1127 (char **)&serial, 1) != 0) { 1128 atomic_inc_64( 1129 &erpt_kstat_data.fmri_set_failed.value.ui64); 1130 } 1131 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri, 1132 FM_FMRI_MEM_OFFSET, offset) != 0) { 1133 atomic_inc_64( 1134 &erpt_kstat_data.fmri_set_failed.value.ui64); 1135 } 1136 } 1137 } 1138 1139 void 1140 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid, 1141 uint64_t vdev_guid) 1142 { 1143 if (version != ZFS_SCHEME_VERSION0) { 1144 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1145 return; 1146 } 1147 1148 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1149 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1150 return; 1151 } 1152 1153 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) { 1154 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1155 return; 1156 } 1157 1158 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) { 1159 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1160 } 1161 1162 if (vdev_guid != 0) { 1163 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) { 1164 atomic_inc_64( 1165 &erpt_kstat_data.fmri_set_failed.value.ui64); 1166 } 1167 } 1168 } 1169 1170 uint64_t 1171 fm_ena_increment(uint64_t ena) 1172 { 1173 uint64_t new_ena; 1174 1175 switch (ENA_FORMAT(ena)) { 1176 case FM_ENA_FMT1: 1177 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT); 1178 break; 1179 case FM_ENA_FMT2: 1180 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT); 1181 break; 1182 default: 1183 new_ena = 0; 1184 } 1185 1186 return (new_ena); 1187 } 1188 1189 uint64_t 1190 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) 1191 { 1192 uint64_t ena = 0; 1193 1194 switch (format) { 1195 case FM_ENA_FMT1: 1196 if (timestamp) { 1197 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1198 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1199 ENA_FMT1_CPUID_MASK) | 1200 ((timestamp << ENA_FMT1_TIME_SHFT) & 1201 ENA_FMT1_TIME_MASK)); 1202 } else { 1203 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1204 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1205 ENA_FMT1_CPUID_MASK) | 1206 ((gethrtime() << ENA_FMT1_TIME_SHFT) & 1207 ENA_FMT1_TIME_MASK)); 1208 } 1209 break; 1210 case FM_ENA_FMT2: 1211 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1212 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK)); 1213 break; 1214 default: 1215 break; 1216 } 1217 1218 return (ena); 1219 } 1220 1221 uint64_t 1222 fm_ena_generate(uint64_t timestamp, uchar_t format) 1223 { 1224 uint64_t ena; 1225 1226 kpreempt_disable(); 1227 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format); 1228 kpreempt_enable(); 1229 1230 return (ena); 1231 } 1232 1233 uint64_t 1234 fm_ena_generation_get(uint64_t ena) 1235 { 1236 uint64_t gen; 1237 1238 switch (ENA_FORMAT(ena)) { 1239 case FM_ENA_FMT1: 1240 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT; 1241 break; 1242 case FM_ENA_FMT2: 1243 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT; 1244 break; 1245 default: 1246 gen = 0; 1247 break; 1248 } 1249 1250 return (gen); 1251 } 1252 1253 uchar_t 1254 fm_ena_format_get(uint64_t ena) 1255 { 1256 1257 return (ENA_FORMAT(ena)); 1258 } 1259 1260 uint64_t 1261 fm_ena_id_get(uint64_t ena) 1262 { 1263 uint64_t id; 1264 1265 switch (ENA_FORMAT(ena)) { 1266 case FM_ENA_FMT1: 1267 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT; 1268 break; 1269 case FM_ENA_FMT2: 1270 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT; 1271 break; 1272 default: 1273 id = 0; 1274 } 1275 1276 return (id); 1277 } 1278 1279 uint64_t 1280 fm_ena_time_get(uint64_t ena) 1281 { 1282 uint64_t time; 1283 1284 switch (ENA_FORMAT(ena)) { 1285 case FM_ENA_FMT1: 1286 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT; 1287 break; 1288 case FM_ENA_FMT2: 1289 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT; 1290 break; 1291 default: 1292 time = 0; 1293 } 1294 1295 return (time); 1296 } 1297 1298 #ifdef _KERNEL 1299 /* 1300 * Helper function to increment ereport dropped count. Used by the event 1301 * rate limiting code to give feedback to the user about how many events were 1302 * rate limited by including them in the 'dropped' count. 1303 */ 1304 void 1305 fm_erpt_dropped_increment(void) 1306 { 1307 atomic_inc_64(&ratelimit_dropped); 1308 } 1309 1310 void 1311 fm_init(void) 1312 { 1313 zevent_len_cur = 0; 1314 zevent_flags = 0; 1315 1316 /* Initialize zevent allocation and generation kstats */ 1317 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, 1318 sizeof (struct erpt_kstat) / sizeof (kstat_named_t), 1319 KSTAT_FLAG_VIRTUAL); 1320 1321 if (fm_ksp != NULL) { 1322 fm_ksp->ks_data = &erpt_kstat_data; 1323 kstat_install(fm_ksp); 1324 } else { 1325 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); 1326 } 1327 1328 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); 1329 list_create(&zevent_list, sizeof (zevent_t), 1330 offsetof(zevent_t, ev_node)); 1331 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); 1332 1333 zfs_ereport_init(); 1334 } 1335 1336 void 1337 fm_fini(void) 1338 { 1339 int count; 1340 1341 zfs_ereport_fini(); 1342 1343 zfs_zevent_drain_all(&count); 1344 1345 mutex_enter(&zevent_lock); 1346 cv_broadcast(&zevent_cv); 1347 1348 zevent_flags |= ZEVENT_SHUTDOWN; 1349 while (zevent_waiters > 0) { 1350 mutex_exit(&zevent_lock); 1351 schedule(); 1352 mutex_enter(&zevent_lock); 1353 } 1354 mutex_exit(&zevent_lock); 1355 1356 cv_destroy(&zevent_cv); 1357 list_destroy(&zevent_list); 1358 mutex_destroy(&zevent_lock); 1359 1360 if (fm_ksp != NULL) { 1361 kstat_delete(fm_ksp); 1362 fm_ksp = NULL; 1363 } 1364 } 1365 #endif /* _KERNEL */ 1366 1367 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW, 1368 "Max event queue length"); 1369