/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * Disk Monitor */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "disk_monitor.h" #include "hotplug_mgr.h" #include "schg_mgr.h" #include "topo_gather.h" #include "dm_platform.h" #define THIS_FMD_MODULE_NAME "disk-monitor" static enum disk_init_state { INIT_STATE_NONE = 0, STATE_CHANGE_MGR_INITTED = 2, HOTPLUG_MGR_INITTED = 4 } g_init_state = INIT_STATE_NONE; typedef enum { LT_SUSPECT, LT_REPAIRED } fm_list_type_t; /* * Global verbosity flag -- controls chattiness of debug messages and * warnings. Its value is determined by the fmd property "log-level" * settable in the DE's .conf file. */ log_class_t g_verbose = 0; cfgdata_t *config_data = NULL; fmd_hdl_t *g_fm_hdl = NULL; static const fmd_prop_t fmd_props[]; static void diskmon_teardown_all(void) { cleanup_hotplug_manager(); cleanup_state_change_manager(config_data); config_fini(); } static int count_disks(diskmon_t *disklistp) { int i = 0; while (disklistp != NULL) { i++; disklistp = disklistp->next; } return (i); } static int diskmon_init(void) { /* * Block the generation of state change events (generated by the * hotplug manager thread) here; they will be unblocked after the * state change manager thread is ready to accept state changes * (shortly after it starts). */ block_state_change_events(); if (dm_platform_init() != 0) goto cleanup; if (init_hotplug_manager() != 0) goto cleanup; else g_init_state |= HOTPLUG_MGR_INITTED; if (init_state_change_manager(config_data) != 0) goto cleanup; else g_init_state |= STATE_CHANGE_MGR_INITTED; return (E_SUCCESS); cleanup: unblock_state_change_events(); /* * The cleanup order here does matter, due to dependencies between the * managers. */ if (g_init_state & HOTPLUG_MGR_INITTED) cleanup_hotplug_manager(); if (g_init_state & STATE_CHANGE_MGR_INITTED) cleanup_state_change_manager(config_data); dm_platform_fini(); return (E_ERROR); } static void dm_fault_execute_actions(fmd_hdl_t *hdl, diskmon_t *diskp, nvlist_t *nvl) { const char *action_prop = NULL; const char *action_string; /* * The predictive failure action is the activation of the fault * indicator. */ if (fmd_nvl_class_match(hdl, nvl, DISK_ERROR_CLASS "." FM_FAULT_DISK_OVERTEMP)) action_prop = DISK_PROP_OTEMPACTION; if (fmd_nvl_class_match(hdl, nvl, DISK_ERROR_CLASS "." FM_FAULT_DISK_TESTFAIL)) action_prop = DISK_PROP_STFAILACTION; dm_fault_indicator_set(diskp, INDICATOR_ON); if (action_prop != NULL && (action_string = dm_prop_lookup(diskp->props, action_prop)) != NULL) { if (dm_platform_indicator_execute(action_string) != 0) { log_warn("Fault action `%s' did not successfully " "complete.\n", action_string); } } } static void diskmon_agent_repair(fmd_hdl_t *hdl, nvlist_t *nvl, int repair) { char *uuid = NULL; nvlist_t **nva; uint_t nvc; diskmon_t *diskp; nvlist_t *fmri; nvlist_t *fltnvl; int err = 0; err |= nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid); err |= nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, &nva, &nvc); if (err != 0) return; while (nvc-- != 0) { fltnvl = *nva++; if (nvlist_lookup_nvlist(fltnvl, FM_FAULT_RESOURCE, &fmri) != 0) continue; if ((diskp = dm_fmri_to_diskmon(hdl, fmri)) == NULL) continue; log_msg(MM_MAIN, "Disk %s repaired!\n", diskp->location); dm_fault_indicator_set(diskp, INDICATOR_OFF); dm_state_change(diskp, HPS_REPAIRED); } if (repair) fmd_case_uuresolved(hdl, uuid); } static void diskmon_agent_suspect(fmd_hdl_t *hdl, nvlist_t *nvl) { char *uuid = NULL; nvlist_t **nva; uint_t nvc; diskmon_t *diskp; nvlist_t *fmri; nvlist_t *fltnvl; int err = 0; err |= nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid); err |= nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, &nva, &nvc); if (err != 0) return; while (nvc-- != 0 && !fmd_case_uuclosed(hdl, uuid)) { fltnvl = *nva++; if (nvlist_lookup_nvlist(fltnvl, FM_FAULT_RESOURCE, &fmri) != 0) continue; if ((diskp = dm_fmri_to_diskmon(hdl, fmri)) == NULL) continue; /* Execute the actions associated with this fault */ dm_fault_execute_actions(hdl, diskp, fltnvl); /* * Send a state change event to the state change manager */ dm_state_change(diskp, HPS_FAULTED); } if (!fmd_case_uuclosed(hdl, uuid)) { /* Case is closed */ fmd_case_uuclose(hdl, uuid); } } /*ARGSUSED*/ static void diskmon_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) { diskmon_t *diskp; nvlist_t *fmri; if (g_verbose & MM_MAIN) nvlist_print(stderr, nvl); /* * Act on the fault suspect list or repaired list (embedded agent * action). */ if (fmd_nvl_class_match(hdl, nvl, FM_LIST_REPAIRED_CLASS)) { diskmon_agent_repair(hdl, nvl, 1); return; } else if (fmd_nvl_class_match(hdl, nvl, FM_LIST_UPDATED_CLASS)) { diskmon_agent_repair(hdl, nvl, 0); return; } else if (fmd_nvl_class_match(hdl, nvl, FM_LIST_SUSPECT_CLASS)) { diskmon_agent_suspect(hdl, nvl); return; } /* * If we get any replayed faults, set the diskmon's faulted * flag for the appropriate fault, then change the diskmon's state * to faulted. */ if (fmd_nvl_class_match(hdl, nvl, DISK_ERROR_CLASS ".*")) { if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &fmri) != 0) return; if ((diskp = dm_fmri_to_diskmon(hdl, fmri)) == NULL) return; /* Execute the actions associated with this fault */ dm_fault_execute_actions(hdl, diskp, nvl); /* * If the fault wasn't generated by this module, send a * state change event to the state change manager */ dm_state_change(diskp, HPS_FAULTED); return; } } static const fmd_hdl_ops_t fmd_ops = { diskmon_recv, /* fmdo_recv */ NULL, /* fmdo_timeout */ NULL, /* fmdo_close */ NULL, /* fmdo_stats */ NULL, /* fmdo_gc */ }; static const fmd_prop_t fmd_props[] = { { GLOBAL_PROP_LOG_LEVEL, FMD_TYPE_UINT32, "0" }, { NULL, 0, NULL } }; static const fmd_hdl_info_t fmd_info = { "Disk Monitor", DISK_MONITOR_MODULE_VERSION, &fmd_ops, fmd_props }; void _fmd_init(fmd_hdl_t *hdl) { fmd_case_t *cp; int disk_count; g_fm_hdl = hdl; if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { return; } if (config_init()) { log_err("Could not initialize configuration!\n"); fmd_hdl_unregister(hdl); return; } if (config_get(hdl, fmd_props)) { config_fini(); log_err("Could not retrieve configuration from libtopo!\n"); fmd_hdl_unregister(hdl); return; } /* * If there are no disks to monitor, bail out */ if ((disk_count = count_disks(config_data->disk_list)) == 0) { config_fini(); fmd_hdl_unregister(hdl); return; } if (diskmon_init() == E_ERROR) { config_fini(); fmd_hdl_unregister(hdl); return; } log_msg(MM_MAIN, "Monitoring %d disks.\n", disk_count); /* * Iterate over all active cases. * Since we automatically solve all cases, these cases must have * had the fault added, but the DE must have been interrupted * before they were solved. */ for (cp = fmd_case_next(hdl, NULL); cp != NULL; cp = fmd_case_next(hdl, cp)) { if (!fmd_case_solved(hdl, cp)) fmd_case_solve(hdl, cp); } } /*ARGSUSED*/ void _fmd_fini(fmd_hdl_t *hdl) { diskmon_teardown_all(); g_fm_hdl = NULL; }