1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23eda14cbcSMatt Macy * 24eda14cbcSMatt Macy * Copyright (c) 2016, Intel Corporation. 25eda14cbcSMatt Macy * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> 26eda14cbcSMatt Macy */ 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy /* 29eda14cbcSMatt Macy * The ZFS retire agent is responsible for managing hot spares across all pools. 30eda14cbcSMatt Macy * When we see a device fault or a device removal, we try to open the associated 31eda14cbcSMatt Macy * pool and look for any hot spares. We iterate over any available hot spares 32eda14cbcSMatt Macy * and attempt a 'zpool replace' for each one. 33eda14cbcSMatt Macy * 34eda14cbcSMatt Macy * For vdevs diagnosed as faulty, the agent is also responsible for proactively 35eda14cbcSMatt Macy * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 36eda14cbcSMatt Macy */ 37eda14cbcSMatt Macy 38eda14cbcSMatt Macy #include <sys/fs/zfs.h> 39eda14cbcSMatt Macy #include <sys/fm/protocol.h> 40eda14cbcSMatt Macy #include <sys/fm/fs/zfs.h> 413ff01b23SMartin Matuska #include <libzutil.h> 42eda14cbcSMatt Macy #include <libzfs.h> 43eda14cbcSMatt Macy #include <string.h> 4453b70c86SMartin Matuska #include <libgen.h> 45eda14cbcSMatt Macy 46eda14cbcSMatt Macy #include "zfs_agents.h" 47eda14cbcSMatt Macy #include "fmd_api.h" 48eda14cbcSMatt Macy 49eda14cbcSMatt Macy 50eda14cbcSMatt Macy typedef struct zfs_retire_repaired { 51eda14cbcSMatt Macy struct zfs_retire_repaired *zrr_next; 52eda14cbcSMatt Macy uint64_t zrr_pool; 53eda14cbcSMatt Macy uint64_t zrr_vdev; 54eda14cbcSMatt Macy } zfs_retire_repaired_t; 55eda14cbcSMatt Macy 56eda14cbcSMatt Macy typedef struct zfs_retire_data { 57eda14cbcSMatt Macy libzfs_handle_t *zrd_hdl; 58eda14cbcSMatt Macy zfs_retire_repaired_t *zrd_repaired; 59eda14cbcSMatt Macy } zfs_retire_data_t; 60eda14cbcSMatt Macy 61eda14cbcSMatt Macy static void 62eda14cbcSMatt Macy zfs_retire_clear_data(fmd_hdl_t *hdl, zfs_retire_data_t *zdp) 63eda14cbcSMatt Macy { 64eda14cbcSMatt Macy zfs_retire_repaired_t *zrp; 65eda14cbcSMatt Macy 66eda14cbcSMatt Macy while ((zrp = zdp->zrd_repaired) != NULL) { 67eda14cbcSMatt Macy zdp->zrd_repaired = zrp->zrr_next; 68eda14cbcSMatt Macy fmd_hdl_free(hdl, zrp, sizeof (zfs_retire_repaired_t)); 69eda14cbcSMatt Macy } 70eda14cbcSMatt Macy } 71eda14cbcSMatt Macy 72eda14cbcSMatt Macy /* 73eda14cbcSMatt Macy * Find a pool with a matching GUID. 74eda14cbcSMatt Macy */ 75eda14cbcSMatt Macy typedef struct find_cbdata { 76eda14cbcSMatt Macy uint64_t cb_guid; 77eda14cbcSMatt Macy zpool_handle_t *cb_zhp; 78eda14cbcSMatt Macy nvlist_t *cb_vdev; 7915f0b8c3SMartin Matuska uint64_t cb_vdev_guid; 8015f0b8c3SMartin Matuska uint64_t cb_num_spares; 81eda14cbcSMatt Macy } find_cbdata_t; 82eda14cbcSMatt Macy 83eda14cbcSMatt Macy static int 84eda14cbcSMatt Macy find_pool(zpool_handle_t *zhp, void *data) 85eda14cbcSMatt Macy { 86eda14cbcSMatt Macy find_cbdata_t *cbp = data; 87eda14cbcSMatt Macy 88eda14cbcSMatt Macy if (cbp->cb_guid == 89eda14cbcSMatt Macy zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { 90eda14cbcSMatt Macy cbp->cb_zhp = zhp; 91eda14cbcSMatt Macy return (1); 92eda14cbcSMatt Macy } 93eda14cbcSMatt Macy 94eda14cbcSMatt Macy zpool_close(zhp); 95eda14cbcSMatt Macy return (0); 96eda14cbcSMatt Macy } 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy /* 99eda14cbcSMatt Macy * Find a vdev within a tree with a matching GUID. 100eda14cbcSMatt Macy */ 101eda14cbcSMatt Macy static nvlist_t * 102eda14cbcSMatt Macy find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid) 103eda14cbcSMatt Macy { 104eda14cbcSMatt Macy uint64_t guid; 105eda14cbcSMatt Macy nvlist_t **child; 106eda14cbcSMatt Macy uint_t c, children; 107eda14cbcSMatt Macy nvlist_t *ret; 108eda14cbcSMatt Macy 109eda14cbcSMatt Macy if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 110eda14cbcSMatt Macy guid == search_guid) { 111eda14cbcSMatt Macy fmd_hdl_debug(fmd_module_hdl("zfs-retire"), 112eda14cbcSMatt Macy "matched vdev %llu", guid); 113eda14cbcSMatt Macy return (nv); 114eda14cbcSMatt Macy } 115eda14cbcSMatt Macy 116eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 117eda14cbcSMatt Macy &child, &children) != 0) 118eda14cbcSMatt Macy return (NULL); 119eda14cbcSMatt Macy 120eda14cbcSMatt Macy for (c = 0; c < children; c++) { 121eda14cbcSMatt Macy if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) 122eda14cbcSMatt Macy return (ret); 123eda14cbcSMatt Macy } 124eda14cbcSMatt Macy 125eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 126eda14cbcSMatt Macy &child, &children) != 0) 127eda14cbcSMatt Macy return (NULL); 128eda14cbcSMatt Macy 129eda14cbcSMatt Macy for (c = 0; c < children; c++) { 130eda14cbcSMatt Macy if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) 131eda14cbcSMatt Macy return (ret); 132eda14cbcSMatt Macy } 133eda14cbcSMatt Macy 134eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 135eda14cbcSMatt Macy &child, &children) != 0) 136eda14cbcSMatt Macy return (NULL); 137eda14cbcSMatt Macy 138eda14cbcSMatt Macy for (c = 0; c < children; c++) { 139eda14cbcSMatt Macy if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) 140eda14cbcSMatt Macy return (ret); 141eda14cbcSMatt Macy } 142eda14cbcSMatt Macy 143eda14cbcSMatt Macy return (NULL); 144eda14cbcSMatt Macy } 145eda14cbcSMatt Macy 14615f0b8c3SMartin Matuska static int 14715f0b8c3SMartin Matuska remove_spares(zpool_handle_t *zhp, void *data) 14815f0b8c3SMartin Matuska { 14915f0b8c3SMartin Matuska nvlist_t *config, *nvroot; 15015f0b8c3SMartin Matuska nvlist_t **spares; 15115f0b8c3SMartin Matuska uint_t nspares; 15215f0b8c3SMartin Matuska char *devname; 15315f0b8c3SMartin Matuska find_cbdata_t *cbp = data; 15415f0b8c3SMartin Matuska uint64_t spareguid = 0; 15515f0b8c3SMartin Matuska vdev_stat_t *vs; 15615f0b8c3SMartin Matuska unsigned int c; 15715f0b8c3SMartin Matuska 15815f0b8c3SMartin Matuska config = zpool_get_config(zhp, NULL); 15915f0b8c3SMartin Matuska if (nvlist_lookup_nvlist(config, 16015f0b8c3SMartin Matuska ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 16115f0b8c3SMartin Matuska zpool_close(zhp); 16215f0b8c3SMartin Matuska return (0); 16315f0b8c3SMartin Matuska } 16415f0b8c3SMartin Matuska 16515f0b8c3SMartin Matuska if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 16615f0b8c3SMartin Matuska &spares, &nspares) != 0) { 16715f0b8c3SMartin Matuska zpool_close(zhp); 16815f0b8c3SMartin Matuska return (0); 16915f0b8c3SMartin Matuska } 17015f0b8c3SMartin Matuska 17115f0b8c3SMartin Matuska for (int i = 0; i < nspares; i++) { 17215f0b8c3SMartin Matuska if (nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID, 17315f0b8c3SMartin Matuska &spareguid) == 0 && spareguid == cbp->cb_vdev_guid) { 17415f0b8c3SMartin Matuska devname = zpool_vdev_name(NULL, zhp, spares[i], 17515f0b8c3SMartin Matuska B_FALSE); 17615f0b8c3SMartin Matuska nvlist_lookup_uint64_array(spares[i], 17715f0b8c3SMartin Matuska ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c); 17815f0b8c3SMartin Matuska if (vs->vs_state != VDEV_STATE_REMOVED && 17915f0b8c3SMartin Matuska zpool_vdev_remove_wanted(zhp, devname) == 0) 18015f0b8c3SMartin Matuska cbp->cb_num_spares++; 18115f0b8c3SMartin Matuska break; 18215f0b8c3SMartin Matuska } 18315f0b8c3SMartin Matuska } 18415f0b8c3SMartin Matuska 18515f0b8c3SMartin Matuska zpool_close(zhp); 18615f0b8c3SMartin Matuska return (0); 18715f0b8c3SMartin Matuska } 18815f0b8c3SMartin Matuska 18915f0b8c3SMartin Matuska /* 19015f0b8c3SMartin Matuska * Given a vdev guid, find and remove all spares associated with it. 19115f0b8c3SMartin Matuska */ 19215f0b8c3SMartin Matuska static int 19315f0b8c3SMartin Matuska find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid) 19415f0b8c3SMartin Matuska { 19515f0b8c3SMartin Matuska find_cbdata_t cb; 19615f0b8c3SMartin Matuska 19715f0b8c3SMartin Matuska cb.cb_num_spares = 0; 19815f0b8c3SMartin Matuska cb.cb_vdev_guid = vdev_guid; 19915f0b8c3SMartin Matuska zpool_iter(zhdl, remove_spares, &cb); 20015f0b8c3SMartin Matuska 20115f0b8c3SMartin Matuska return (cb.cb_num_spares); 20215f0b8c3SMartin Matuska } 20315f0b8c3SMartin Matuska 204eda14cbcSMatt Macy /* 205eda14cbcSMatt Macy * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 206eda14cbcSMatt Macy */ 207eda14cbcSMatt Macy static zpool_handle_t * 208eda14cbcSMatt Macy find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 209eda14cbcSMatt Macy nvlist_t **vdevp) 210eda14cbcSMatt Macy { 211eda14cbcSMatt Macy find_cbdata_t cb; 212eda14cbcSMatt Macy zpool_handle_t *zhp; 213eda14cbcSMatt Macy nvlist_t *config, *nvroot; 214eda14cbcSMatt Macy 215eda14cbcSMatt Macy /* 216eda14cbcSMatt Macy * Find the corresponding pool and make sure the vdev still exists. 217eda14cbcSMatt Macy */ 218eda14cbcSMatt Macy cb.cb_guid = pool_guid; 219eda14cbcSMatt Macy if (zpool_iter(zhdl, find_pool, &cb) != 1) 220eda14cbcSMatt Macy return (NULL); 221eda14cbcSMatt Macy 222eda14cbcSMatt Macy zhp = cb.cb_zhp; 223eda14cbcSMatt Macy config = zpool_get_config(zhp, NULL); 224eda14cbcSMatt Macy if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 225eda14cbcSMatt Macy &nvroot) != 0) { 226eda14cbcSMatt Macy zpool_close(zhp); 227eda14cbcSMatt Macy return (NULL); 228eda14cbcSMatt Macy } 229eda14cbcSMatt Macy 230eda14cbcSMatt Macy if (vdev_guid != 0) { 231eda14cbcSMatt Macy if ((*vdevp = find_vdev(zhdl, nvroot, vdev_guid)) == NULL) { 232eda14cbcSMatt Macy zpool_close(zhp); 233eda14cbcSMatt Macy return (NULL); 234eda14cbcSMatt Macy } 235eda14cbcSMatt Macy } 236eda14cbcSMatt Macy 237eda14cbcSMatt Macy return (zhp); 238eda14cbcSMatt Macy } 239eda14cbcSMatt Macy 240eda14cbcSMatt Macy /* 241eda14cbcSMatt Macy * Given a vdev, attempt to replace it with every known spare until one 242eda14cbcSMatt Macy * succeeds or we run out of devices to try. 243eda14cbcSMatt Macy * Return whether we were successful or not in replacing the device. 244eda14cbcSMatt Macy */ 245eda14cbcSMatt Macy static boolean_t 246eda14cbcSMatt Macy replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) 247eda14cbcSMatt Macy { 248eda14cbcSMatt Macy nvlist_t *config, *nvroot, *replacement; 249eda14cbcSMatt Macy nvlist_t **spares; 250eda14cbcSMatt Macy uint_t s, nspares; 251eda14cbcSMatt Macy char *dev_name; 252eda14cbcSMatt Macy zprop_source_t source; 253eda14cbcSMatt Macy int ashift; 254eda14cbcSMatt Macy 255eda14cbcSMatt Macy config = zpool_get_config(zhp, NULL); 256eda14cbcSMatt Macy if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 257eda14cbcSMatt Macy &nvroot) != 0) 258eda14cbcSMatt Macy return (B_FALSE); 259eda14cbcSMatt Macy 260eda14cbcSMatt Macy /* 261eda14cbcSMatt Macy * Find out if there are any hot spares available in the pool. 262eda14cbcSMatt Macy */ 263eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 264eda14cbcSMatt Macy &spares, &nspares) != 0) 265eda14cbcSMatt Macy return (B_FALSE); 266eda14cbcSMatt Macy 267eda14cbcSMatt Macy /* 268eda14cbcSMatt Macy * lookup "ashift" pool property, we may need it for the replacement 269eda14cbcSMatt Macy */ 270eda14cbcSMatt Macy ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &source); 271eda14cbcSMatt Macy 272eda14cbcSMatt Macy replacement = fmd_nvl_alloc(hdl, FMD_SLEEP); 273eda14cbcSMatt Macy 274eda14cbcSMatt Macy (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 275eda14cbcSMatt Macy VDEV_TYPE_ROOT); 276eda14cbcSMatt Macy 277eda14cbcSMatt Macy dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); 278eda14cbcSMatt Macy 279eda14cbcSMatt Macy /* 280eda14cbcSMatt Macy * Try to replace each spare, ending when we successfully 281eda14cbcSMatt Macy * replace it. 282eda14cbcSMatt Macy */ 283eda14cbcSMatt Macy for (s = 0; s < nspares; s++) { 2847877fdebSMatt Macy boolean_t rebuild = B_FALSE; 2852a58b312SMartin Matuska const char *spare_name, *type; 286eda14cbcSMatt Macy 287eda14cbcSMatt Macy if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 288eda14cbcSMatt Macy &spare_name) != 0) 289eda14cbcSMatt Macy continue; 290eda14cbcSMatt Macy 2917877fdebSMatt Macy /* prefer sequential resilvering for distributed spares */ 2927877fdebSMatt Macy if ((nvlist_lookup_string(spares[s], ZPOOL_CONFIG_TYPE, 2937877fdebSMatt Macy &type) == 0) && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0) 2947877fdebSMatt Macy rebuild = B_TRUE; 2957877fdebSMatt Macy 296eda14cbcSMatt Macy /* if set, add the "ashift" pool property to the spare nvlist */ 297eda14cbcSMatt Macy if (source != ZPROP_SRC_DEFAULT) 298eda14cbcSMatt Macy (void) nvlist_add_uint64(spares[s], 299eda14cbcSMatt Macy ZPOOL_CONFIG_ASHIFT, ashift); 300eda14cbcSMatt Macy 301eda14cbcSMatt Macy (void) nvlist_add_nvlist_array(replacement, 302681ce946SMartin Matuska ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spares[s], 1); 303eda14cbcSMatt Macy 304eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_vdev_replace '%s' with spare '%s'", 3053ff01b23SMartin Matuska dev_name, zfs_basename(spare_name)); 306eda14cbcSMatt Macy 307eda14cbcSMatt Macy if (zpool_vdev_attach(zhp, dev_name, spare_name, 3087877fdebSMatt Macy replacement, B_TRUE, rebuild) == 0) { 309eda14cbcSMatt Macy free(dev_name); 310eda14cbcSMatt Macy nvlist_free(replacement); 311eda14cbcSMatt Macy return (B_TRUE); 312eda14cbcSMatt Macy } 313eda14cbcSMatt Macy } 314eda14cbcSMatt Macy 315eda14cbcSMatt Macy free(dev_name); 316eda14cbcSMatt Macy nvlist_free(replacement); 317eda14cbcSMatt Macy 318eda14cbcSMatt Macy return (B_FALSE); 319eda14cbcSMatt Macy } 320eda14cbcSMatt Macy 321eda14cbcSMatt Macy /* 322eda14cbcSMatt Macy * Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and 323eda14cbcSMatt Macy * ASRU is now usable. ZFS has found the device to be present and 324eda14cbcSMatt Macy * functioning. 325eda14cbcSMatt Macy */ 326eda14cbcSMatt Macy static void 327eda14cbcSMatt Macy zfs_vdev_repair(fmd_hdl_t *hdl, nvlist_t *nvl) 328eda14cbcSMatt Macy { 329eda14cbcSMatt Macy zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 330eda14cbcSMatt Macy zfs_retire_repaired_t *zrp; 331eda14cbcSMatt Macy uint64_t pool_guid, vdev_guid; 332eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 333eda14cbcSMatt Macy &pool_guid) != 0 || nvlist_lookup_uint64(nvl, 334eda14cbcSMatt Macy FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) 335eda14cbcSMatt Macy return; 336eda14cbcSMatt Macy 337eda14cbcSMatt Macy /* 338eda14cbcSMatt Macy * Before checking the state of the ASRU, go through and see if we've 339eda14cbcSMatt Macy * already made an attempt to repair this ASRU. This list is cleared 340eda14cbcSMatt Macy * whenever we receive any kind of list event, and is designed to 341eda14cbcSMatt Macy * prevent us from generating a feedback loop when we attempt repairs 342eda14cbcSMatt Macy * against a faulted pool. The problem is that checking the unusable 343eda14cbcSMatt Macy * state of the ASRU can involve opening the pool, which can post 344eda14cbcSMatt Macy * statechange events but otherwise leave the pool in the faulted 345eda14cbcSMatt Macy * state. This list allows us to detect when a statechange event is 346eda14cbcSMatt Macy * due to our own request. 347eda14cbcSMatt Macy */ 348eda14cbcSMatt Macy for (zrp = zdp->zrd_repaired; zrp != NULL; zrp = zrp->zrr_next) { 349eda14cbcSMatt Macy if (zrp->zrr_pool == pool_guid && 350eda14cbcSMatt Macy zrp->zrr_vdev == vdev_guid) 351eda14cbcSMatt Macy return; 352eda14cbcSMatt Macy } 353eda14cbcSMatt Macy 354eda14cbcSMatt Macy zrp = fmd_hdl_alloc(hdl, sizeof (zfs_retire_repaired_t), FMD_SLEEP); 355eda14cbcSMatt Macy zrp->zrr_next = zdp->zrd_repaired; 356eda14cbcSMatt Macy zrp->zrr_pool = pool_guid; 357eda14cbcSMatt Macy zrp->zrr_vdev = vdev_guid; 358eda14cbcSMatt Macy zdp->zrd_repaired = zrp; 359eda14cbcSMatt Macy 360eda14cbcSMatt Macy fmd_hdl_debug(hdl, "marking repaired vdev %llu on pool %llu", 361eda14cbcSMatt Macy vdev_guid, pool_guid); 362eda14cbcSMatt Macy } 363eda14cbcSMatt Macy 364eda14cbcSMatt Macy static void 365eda14cbcSMatt Macy zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 366eda14cbcSMatt Macy const char *class) 367eda14cbcSMatt Macy { 368e92ffd9bSMartin Matuska (void) ep; 369eda14cbcSMatt Macy uint64_t pool_guid, vdev_guid; 370eda14cbcSMatt Macy zpool_handle_t *zhp; 371eda14cbcSMatt Macy nvlist_t *resource, *fault; 372eda14cbcSMatt Macy nvlist_t **faults; 373eda14cbcSMatt Macy uint_t f, nfaults; 374eda14cbcSMatt Macy zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 375eda14cbcSMatt Macy libzfs_handle_t *zhdl = zdp->zrd_hdl; 376eda14cbcSMatt Macy boolean_t fault_device, degrade_device; 377eda14cbcSMatt Macy boolean_t is_repair; 37815f0b8c3SMartin Matuska boolean_t l2arc = B_FALSE; 37915f0b8c3SMartin Matuska boolean_t spare = B_FALSE; 3802a58b312SMartin Matuska const char *scheme; 381eda14cbcSMatt Macy nvlist_t *vdev = NULL; 3822a58b312SMartin Matuska const char *uuid; 383eda14cbcSMatt Macy int repair_done = 0; 384eda14cbcSMatt Macy boolean_t retire; 385eda14cbcSMatt Macy boolean_t is_disk; 386eda14cbcSMatt Macy vdev_aux_t aux; 387eda14cbcSMatt Macy uint64_t state = 0; 388be181ee2SMartin Matuska vdev_stat_t *vs; 389be181ee2SMartin Matuska unsigned int c; 390eda14cbcSMatt Macy 391eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class); 392eda14cbcSMatt Macy 393be181ee2SMartin Matuska (void) nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, 394be181ee2SMartin Matuska &state); 395eda14cbcSMatt Macy 396eda14cbcSMatt Macy /* 397eda14cbcSMatt Macy * If this is a resource notifying us of device removal then simply 398eda14cbcSMatt Macy * check for an available spare and continue unless the device is a 399eda14cbcSMatt Macy * l2arc vdev, in which case we just offline it. 400eda14cbcSMatt Macy */ 401eda14cbcSMatt Macy if (strcmp(class, "resource.fs.zfs.removed") == 0 || 402eda14cbcSMatt Macy (strcmp(class, "resource.fs.zfs.statechange") == 0 && 403184c1b94SMartin Matuska (state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) { 4042a58b312SMartin Matuska const char *devtype; 405eda14cbcSMatt Macy char *devname; 406eda14cbcSMatt Macy 40715f0b8c3SMartin Matuska if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 40815f0b8c3SMartin Matuska &devtype) == 0) { 40915f0b8c3SMartin Matuska if (strcmp(devtype, VDEV_TYPE_SPARE) == 0) 41015f0b8c3SMartin Matuska spare = B_TRUE; 41115f0b8c3SMartin Matuska else if (strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) 41215f0b8c3SMartin Matuska l2arc = B_TRUE; 41315f0b8c3SMartin Matuska } 41415f0b8c3SMartin Matuska 41515f0b8c3SMartin Matuska if (nvlist_lookup_uint64(nvl, 41615f0b8c3SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) 41715f0b8c3SMartin Matuska return; 41815f0b8c3SMartin Matuska 419315ee00fSMartin Matuska if (vdev_guid == 0) { 420315ee00fSMartin Matuska fmd_hdl_debug(hdl, "Got a zero GUID"); 421315ee00fSMartin Matuska return; 422315ee00fSMartin Matuska } 423315ee00fSMartin Matuska 42415f0b8c3SMartin Matuska if (spare) { 42515f0b8c3SMartin Matuska int nspares = find_and_remove_spares(zhdl, vdev_guid); 42615f0b8c3SMartin Matuska fmd_hdl_debug(hdl, "%d spares removed", nspares); 42715f0b8c3SMartin Matuska return; 42815f0b8c3SMartin Matuska } 42915f0b8c3SMartin Matuska 430eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 43115f0b8c3SMartin Matuska &pool_guid) != 0) 432eda14cbcSMatt Macy return; 433eda14cbcSMatt Macy 434eda14cbcSMatt Macy if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 435eda14cbcSMatt Macy &vdev)) == NULL) 436eda14cbcSMatt Macy return; 437eda14cbcSMatt Macy 438eda14cbcSMatt Macy devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); 439eda14cbcSMatt Macy 440be181ee2SMartin Matuska nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, 441be181ee2SMartin Matuska (uint64_t **)&vs, &c); 442be181ee2SMartin Matuska 443be181ee2SMartin Matuska /* 444be181ee2SMartin Matuska * If state removed is requested for already removed vdev, 445be181ee2SMartin Matuska * its a loopback event from spa_async_remove(). Just 446be181ee2SMartin Matuska * ignore it. 447be181ee2SMartin Matuska */ 448be181ee2SMartin Matuska if (vs->vs_state == VDEV_STATE_REMOVED && 449be181ee2SMartin Matuska state == VDEV_STATE_REMOVED) 450be181ee2SMartin Matuska return; 451be181ee2SMartin Matuska 452be181ee2SMartin Matuska /* Remove the vdev since device is unplugged */ 453c0a83fe0SMartin Matuska int remove_status = 0; 454be181ee2SMartin Matuska if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { 455c0a83fe0SMartin Matuska remove_status = zpool_vdev_remove_wanted(zhp, devname); 456be181ee2SMartin Matuska fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'" 457c0a83fe0SMartin Matuska ", err:%d", devname, libzfs_errno(zhdl)); 458be181ee2SMartin Matuska } 459be181ee2SMartin Matuska 460be181ee2SMartin Matuska /* Replace the vdev with a spare if its not a l2arc */ 461c0a83fe0SMartin Matuska if (!l2arc && !remove_status && 462c0a83fe0SMartin Matuska (!fmd_prop_get_int32(hdl, "spare_on_remove") || 463be181ee2SMartin Matuska replace_with_spare(hdl, zhp, vdev) == B_FALSE)) { 464eda14cbcSMatt Macy /* Could not handle with spare */ 465eda14cbcSMatt Macy fmd_hdl_debug(hdl, "no spare for '%s'", devname); 466eda14cbcSMatt Macy } 467eda14cbcSMatt Macy 468eda14cbcSMatt Macy free(devname); 469eda14cbcSMatt Macy zpool_close(zhp); 470eda14cbcSMatt Macy return; 471eda14cbcSMatt Macy } 472eda14cbcSMatt Macy 473eda14cbcSMatt Macy if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) 474eda14cbcSMatt Macy return; 475eda14cbcSMatt Macy 476eda14cbcSMatt Macy /* 477180f8225SMatt Macy * Note: on Linux statechange events are more than just 478eda14cbcSMatt Macy * healthy ones so we need to confirm the actual state value. 479eda14cbcSMatt Macy */ 480eda14cbcSMatt Macy if (strcmp(class, "resource.fs.zfs.statechange") == 0 && 481eda14cbcSMatt Macy state == VDEV_STATE_HEALTHY) { 482eda14cbcSMatt Macy zfs_vdev_repair(hdl, nvl); 483eda14cbcSMatt Macy return; 484eda14cbcSMatt Macy } 485eda14cbcSMatt Macy if (strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) { 486eda14cbcSMatt Macy zfs_vdev_repair(hdl, nvl); 487eda14cbcSMatt Macy return; 488eda14cbcSMatt Macy } 489eda14cbcSMatt Macy 490eda14cbcSMatt Macy zfs_retire_clear_data(hdl, zdp); 491eda14cbcSMatt Macy 492eda14cbcSMatt Macy if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) 493eda14cbcSMatt Macy is_repair = B_TRUE; 494eda14cbcSMatt Macy else 495eda14cbcSMatt Macy is_repair = B_FALSE; 496eda14cbcSMatt Macy 497eda14cbcSMatt Macy /* 498eda14cbcSMatt Macy * We subscribe to zfs faults as well as all repair events. 499eda14cbcSMatt Macy */ 500eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 501eda14cbcSMatt Macy &faults, &nfaults) != 0) 502eda14cbcSMatt Macy return; 503eda14cbcSMatt Macy 504eda14cbcSMatt Macy for (f = 0; f < nfaults; f++) { 505eda14cbcSMatt Macy fault = faults[f]; 506eda14cbcSMatt Macy 507eda14cbcSMatt Macy fault_device = B_FALSE; 508eda14cbcSMatt Macy degrade_device = B_FALSE; 509eda14cbcSMatt Macy is_disk = B_FALSE; 510eda14cbcSMatt Macy 511eda14cbcSMatt Macy if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE, 512eda14cbcSMatt Macy &retire) == 0 && retire == 0) 513eda14cbcSMatt Macy continue; 514eda14cbcSMatt Macy 515eda14cbcSMatt Macy /* 516eda14cbcSMatt Macy * While we subscribe to fault.fs.zfs.*, we only take action 517eda14cbcSMatt Macy * for faults targeting a specific vdev (open failure or SERD 518eda14cbcSMatt Macy * failure). We also subscribe to fault.io.* events, so that 519eda14cbcSMatt Macy * faulty disks will be faulted in the ZFS configuration. 520eda14cbcSMatt Macy */ 521eda14cbcSMatt Macy if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) { 522eda14cbcSMatt Macy fault_device = B_TRUE; 523eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, fault, 524eda14cbcSMatt Macy "fault.fs.zfs.vdev.checksum")) { 525eda14cbcSMatt Macy degrade_device = B_TRUE; 526eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, fault, 527eda14cbcSMatt Macy "fault.fs.zfs.device")) { 528eda14cbcSMatt Macy fault_device = B_FALSE; 529eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) { 530eda14cbcSMatt Macy is_disk = B_TRUE; 531eda14cbcSMatt Macy fault_device = B_TRUE; 532eda14cbcSMatt Macy } else { 533eda14cbcSMatt Macy continue; 534eda14cbcSMatt Macy } 535eda14cbcSMatt Macy 536eda14cbcSMatt Macy if (is_disk) { 537eda14cbcSMatt Macy continue; 538eda14cbcSMatt Macy } else { 539eda14cbcSMatt Macy /* 540eda14cbcSMatt Macy * This is a ZFS fault. Lookup the resource, and 541eda14cbcSMatt Macy * attempt to find the matching vdev. 542eda14cbcSMatt Macy */ 543eda14cbcSMatt Macy if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 544eda14cbcSMatt Macy &resource) != 0 || 545eda14cbcSMatt Macy nvlist_lookup_string(resource, FM_FMRI_SCHEME, 546eda14cbcSMatt Macy &scheme) != 0) 547eda14cbcSMatt Macy continue; 548eda14cbcSMatt Macy 549eda14cbcSMatt Macy if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 550eda14cbcSMatt Macy continue; 551eda14cbcSMatt Macy 552eda14cbcSMatt Macy if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 553eda14cbcSMatt Macy &pool_guid) != 0) 554eda14cbcSMatt Macy continue; 555eda14cbcSMatt Macy 556eda14cbcSMatt Macy if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 557eda14cbcSMatt Macy &vdev_guid) != 0) { 558eda14cbcSMatt Macy if (is_repair) 559eda14cbcSMatt Macy vdev_guid = 0; 560eda14cbcSMatt Macy else 561eda14cbcSMatt Macy continue; 562eda14cbcSMatt Macy } 563eda14cbcSMatt Macy 564eda14cbcSMatt Macy if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 565eda14cbcSMatt Macy &vdev)) == NULL) 566eda14cbcSMatt Macy continue; 567eda14cbcSMatt Macy 568eda14cbcSMatt Macy aux = VDEV_AUX_ERR_EXCEEDED; 569eda14cbcSMatt Macy } 570eda14cbcSMatt Macy 571eda14cbcSMatt Macy if (vdev_guid == 0) { 572eda14cbcSMatt Macy /* 573eda14cbcSMatt Macy * For pool-level repair events, clear the entire pool. 574eda14cbcSMatt Macy */ 575eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_clear of pool '%s'", 576eda14cbcSMatt Macy zpool_get_name(zhp)); 577eda14cbcSMatt Macy (void) zpool_clear(zhp, NULL, NULL); 578eda14cbcSMatt Macy zpool_close(zhp); 579eda14cbcSMatt Macy continue; 580eda14cbcSMatt Macy } 581eda14cbcSMatt Macy 582eda14cbcSMatt Macy /* 583eda14cbcSMatt Macy * If this is a repair event, then mark the vdev as repaired and 584eda14cbcSMatt Macy * continue. 585eda14cbcSMatt Macy */ 586eda14cbcSMatt Macy if (is_repair) { 587eda14cbcSMatt Macy repair_done = 1; 588eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_clear of pool '%s' vdev %llu", 589eda14cbcSMatt Macy zpool_get_name(zhp), vdev_guid); 590eda14cbcSMatt Macy (void) zpool_vdev_clear(zhp, vdev_guid); 591eda14cbcSMatt Macy zpool_close(zhp); 592eda14cbcSMatt Macy continue; 593eda14cbcSMatt Macy } 594eda14cbcSMatt Macy 595eda14cbcSMatt Macy /* 596eda14cbcSMatt Macy * Actively fault the device if needed. 597eda14cbcSMatt Macy */ 598eda14cbcSMatt Macy if (fault_device) 599eda14cbcSMatt Macy (void) zpool_vdev_fault(zhp, vdev_guid, aux); 600eda14cbcSMatt Macy if (degrade_device) 601eda14cbcSMatt Macy (void) zpool_vdev_degrade(zhp, vdev_guid, aux); 602eda14cbcSMatt Macy 603eda14cbcSMatt Macy if (fault_device || degrade_device) 604eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_vdev_%s: vdev %llu on '%s'", 605eda14cbcSMatt Macy fault_device ? "fault" : "degrade", vdev_guid, 606eda14cbcSMatt Macy zpool_get_name(zhp)); 607eda14cbcSMatt Macy 608eda14cbcSMatt Macy /* 609eda14cbcSMatt Macy * Attempt to substitute a hot spare. 610eda14cbcSMatt Macy */ 611eda14cbcSMatt Macy (void) replace_with_spare(hdl, zhp, vdev); 6127877fdebSMatt Macy 613eda14cbcSMatt Macy zpool_close(zhp); 614eda14cbcSMatt Macy } 615eda14cbcSMatt Macy 616eda14cbcSMatt Macy if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done && 617eda14cbcSMatt Macy nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 618eda14cbcSMatt Macy fmd_case_uuresolved(hdl, uuid); 619eda14cbcSMatt Macy } 620eda14cbcSMatt Macy 621eda14cbcSMatt Macy static const fmd_hdl_ops_t fmd_ops = { 622eda14cbcSMatt Macy zfs_retire_recv, /* fmdo_recv */ 623eda14cbcSMatt Macy NULL, /* fmdo_timeout */ 624eda14cbcSMatt Macy NULL, /* fmdo_close */ 625eda14cbcSMatt Macy NULL, /* fmdo_stats */ 626eda14cbcSMatt Macy NULL, /* fmdo_gc */ 627eda14cbcSMatt Macy }; 628eda14cbcSMatt Macy 629eda14cbcSMatt Macy static const fmd_prop_t fmd_props[] = { 630eda14cbcSMatt Macy { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 631eda14cbcSMatt Macy { NULL, 0, NULL } 632eda14cbcSMatt Macy }; 633eda14cbcSMatt Macy 634eda14cbcSMatt Macy static const fmd_hdl_info_t fmd_info = { 635eda14cbcSMatt Macy "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 636eda14cbcSMatt Macy }; 637eda14cbcSMatt Macy 638eda14cbcSMatt Macy void 639eda14cbcSMatt Macy _zfs_retire_init(fmd_hdl_t *hdl) 640eda14cbcSMatt Macy { 641eda14cbcSMatt Macy zfs_retire_data_t *zdp; 642eda14cbcSMatt Macy libzfs_handle_t *zhdl; 643eda14cbcSMatt Macy 644eda14cbcSMatt Macy if ((zhdl = libzfs_init()) == NULL) 645eda14cbcSMatt Macy return; 646eda14cbcSMatt Macy 647eda14cbcSMatt Macy if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 648eda14cbcSMatt Macy libzfs_fini(zhdl); 649eda14cbcSMatt Macy return; 650eda14cbcSMatt Macy } 651eda14cbcSMatt Macy 652eda14cbcSMatt Macy zdp = fmd_hdl_zalloc(hdl, sizeof (zfs_retire_data_t), FMD_SLEEP); 653eda14cbcSMatt Macy zdp->zrd_hdl = zhdl; 654eda14cbcSMatt Macy 655eda14cbcSMatt Macy fmd_hdl_setspecific(hdl, zdp); 656eda14cbcSMatt Macy } 657eda14cbcSMatt Macy 658eda14cbcSMatt Macy void 659eda14cbcSMatt Macy _zfs_retire_fini(fmd_hdl_t *hdl) 660eda14cbcSMatt Macy { 661eda14cbcSMatt Macy zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 662eda14cbcSMatt Macy 663eda14cbcSMatt Macy if (zdp != NULL) { 664eda14cbcSMatt Macy zfs_retire_clear_data(hdl, zdp); 665eda14cbcSMatt Macy libzfs_fini(zdp->zrd_hdl); 666eda14cbcSMatt Macy fmd_hdl_free(hdl, zdp, sizeof (zfs_retire_data_t)); 667eda14cbcSMatt Macy } 668eda14cbcSMatt Macy } 669