1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6  * You can obtain a copy of the license from the top-level file
7  * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8  * You may not use this file except in compliance with the license.
9  *
10  * CDDL HEADER END
11  */
12 
13 /*
14  * Copyright (c) 2016, Intel Corporation.
15  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
16  * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
17  */
18 
19 #include <libnvpair.h>
20 #include <libzfs.h>
21 #include <stddef.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/list.h>
25 #include <sys/time.h>
26 #include <sys/sysevent/eventdefs.h>
27 #include <sys/sysevent/dev.h>
28 #include <sys/fm/protocol.h>
29 #include <sys/fm/fs/zfs.h>
30 #include <pthread.h>
31 #include <unistd.h>
32 
33 #include "zfs_agents.h"
34 #include "fmd_api.h"
35 #include "../zed_log.h"
36 
37 /*
38  * agent dispatch code
39  */
40 
41 static pthread_mutex_t	agent_lock = PTHREAD_MUTEX_INITIALIZER;
42 static pthread_cond_t	agent_cond = PTHREAD_COND_INITIALIZER;
43 static list_t		agent_events;	/* list of pending events */
44 static int		agent_exiting;
45 
46 typedef struct agent_event {
47 	char		ae_class[64];
48 	char		ae_subclass[32];
49 	nvlist_t	*ae_nvl;
50 	list_node_t	ae_node;
51 } agent_event_t;
52 
53 pthread_t g_agents_tid;
54 
55 libzfs_handle_t *g_zfs_hdl;
56 
57 /* guid search data */
58 typedef enum device_type {
59 	DEVICE_TYPE_L2ARC,	/* l2arc device */
60 	DEVICE_TYPE_SPARE,	/* spare device */
61 	DEVICE_TYPE_PRIMARY	/* any primary pool storage device */
62 } device_type_t;
63 
64 typedef struct guid_search {
65 	uint64_t	gs_pool_guid;
66 	uint64_t	gs_vdev_guid;
67 	char		*gs_devid;
68 	device_type_t	gs_vdev_type;
69 	uint64_t	gs_vdev_expandtime;	/* vdev expansion time */
70 } guid_search_t;
71 
72 /*
73  * Walks the vdev tree recursively looking for a matching devid.
74  * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
75  */
76 static boolean_t
77 zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
78 {
79 	guid_search_t *gsp = arg;
80 	char *path = NULL;
81 	uint_t c, children;
82 	nvlist_t **child;
83 
84 	/*
85 	 * First iterate over any children.
86 	 */
87 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
88 	    &child, &children) == 0) {
89 		for (c = 0; c < children; c++) {
90 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
91 				gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
92 				return (B_TRUE);
93 			}
94 		}
95 	}
96 	/*
97 	 * Iterate over any spares and cache devices
98 	 */
99 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
100 	    &child, &children) == 0) {
101 		for (c = 0; c < children; c++) {
102 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
103 				gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
104 				return (B_TRUE);
105 			}
106 		}
107 	}
108 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
109 	    &child, &children) == 0) {
110 		for (c = 0; c < children; c++) {
111 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
112 				gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
113 				return (B_TRUE);
114 			}
115 		}
116 	}
117 	/*
118 	 * On a devid match, grab the vdev guid and expansion time, if any.
119 	 */
120 	if (gsp->gs_devid != NULL &&
121 	    (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
122 	    (strcmp(gsp->gs_devid, path) == 0)) {
123 		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
124 		    &gsp->gs_vdev_guid);
125 		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
126 		    &gsp->gs_vdev_expandtime);
127 		return (B_TRUE);
128 	}
129 
130 	return (B_FALSE);
131 }
132 
133 static int
134 zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
135 {
136 	guid_search_t *gsp = arg;
137 	nvlist_t *config, *nvl;
138 
139 	/*
140 	 * For each vdev in this pool, look for a match by devid
141 	 */
142 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
143 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
144 		    &nvl) == 0) {
145 			(void) zfs_agent_iter_vdev(zhp, nvl, gsp);
146 		}
147 	}
148 	/*
149 	 * if a match was found then grab the pool guid
150 	 */
151 	if (gsp->gs_vdev_guid) {
152 		(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
153 		    &gsp->gs_pool_guid);
154 	}
155 
156 	zpool_close(zhp);
157 	return (gsp->gs_vdev_guid != 0);
158 }
159 
160 void
161 zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
162 {
163 	agent_event_t *event;
164 
165 	if (subclass == NULL)
166 		subclass = "";
167 
168 	event = malloc(sizeof (agent_event_t));
169 	if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
170 		if (event)
171 			free(event);
172 		return;
173 	}
174 
175 	if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
176 		class = EC_ZFS;
177 		subclass = ESC_ZFS_VDEV_CHECK;
178 	}
179 
180 	/*
181 	 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
182 	 * from the vdev_disk layer after a hot unplug. Fortunately we do
183 	 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
184 	 * proxy so we remap it here for the benefit of the diagnosis engine.
185 	 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
186 	 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
187 	 */
188 	if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
189 	    (strcmp(subclass, ESC_DISK) == 0) &&
190 	    (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
191 	    nvlist_exists(nvl, DEV_IDENTIFIER))) {
192 		nvlist_t *payload = event->ae_nvl;
193 		struct timeval tv;
194 		int64_t tod[2];
195 		uint64_t pool_guid = 0, vdev_guid = 0;
196 		guid_search_t search = { 0 };
197 		device_type_t devtype = DEVICE_TYPE_PRIMARY;
198 
199 		class = "resource.fs.zfs.removed";
200 		subclass = "";
201 
202 		(void) nvlist_add_string(payload, FM_CLASS, class);
203 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
204 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
205 
206 		(void) gettimeofday(&tv, NULL);
207 		tod[0] = tv.tv_sec;
208 		tod[1] = tv.tv_usec;
209 		(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
210 
211 		/*
212 		 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
213 		 * ZFS_EV_POOL_GUID may be missing so find them.
214 		 */
215 		if (pool_guid == 0 || vdev_guid == 0) {
216 			if ((nvlist_lookup_string(nvl, DEV_IDENTIFIER,
217 			    &search.gs_devid) == 0) &&
218 			    (zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search)
219 			    == 1)) {
220 				if (pool_guid == 0)
221 					pool_guid = search.gs_pool_guid;
222 				if (vdev_guid == 0)
223 					vdev_guid = search.gs_vdev_guid;
224 				devtype = search.gs_vdev_type;
225 			}
226 		}
227 
228 		/*
229 		 * We want to avoid reporting "remove" events coming from
230 		 * libudev for VDEVs which were expanded recently (10s) and
231 		 * avoid activating spares in response to partitions being
232 		 * deleted and created in rapid succession.
233 		 */
234 		if (search.gs_vdev_expandtime != 0 &&
235 		    search.gs_vdev_expandtime + 10 > tv.tv_sec) {
236 			zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
237 			    "for recently expanded device '%s'", EC_DEV_REMOVE,
238 			    search.gs_devid);
239 			goto out;
240 		}
241 
242 		(void) nvlist_add_uint64(payload,
243 		    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
244 		(void) nvlist_add_uint64(payload,
245 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
246 		switch (devtype) {
247 		case DEVICE_TYPE_L2ARC:
248 			(void) nvlist_add_string(payload,
249 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
250 			    VDEV_TYPE_L2CACHE);
251 			break;
252 		case DEVICE_TYPE_SPARE:
253 			(void) nvlist_add_string(payload,
254 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
255 			break;
256 		case DEVICE_TYPE_PRIMARY:
257 			(void) nvlist_add_string(payload,
258 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
259 			break;
260 		}
261 
262 		zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
263 		    EC_DEV_REMOVE, class);
264 	}
265 
266 	(void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
267 	(void) strlcpy(event->ae_subclass, subclass,
268 	    sizeof (event->ae_subclass));
269 
270 	(void) pthread_mutex_lock(&agent_lock);
271 	list_insert_tail(&agent_events, event);
272 	(void) pthread_mutex_unlock(&agent_lock);
273 
274 out:
275 	(void) pthread_cond_signal(&agent_cond);
276 }
277 
278 static void
279 zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
280 {
281 	/*
282 	 * The diagnosis engine subscribes to the following events.
283 	 * On illumos these subscriptions reside in:
284 	 * 	/usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
285 	 */
286 	if (strstr(class, "ereport.fs.zfs.") != NULL ||
287 	    strstr(class, "resource.fs.zfs.") != NULL ||
288 	    strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
289 	    strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
290 	    strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
291 		fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
292 	}
293 
294 	/*
295 	 * The retire agent subscribes to the following events.
296 	 * On illumos these subscriptions reside in:
297 	 * 	/usr/lib/fm/fmd/plugins/zfs-retire.conf
298 	 *
299 	 * NOTE: faults events come directly from our diagnosis engine
300 	 * and will not pass through the zfs kernel module.
301 	 */
302 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
303 	    strcmp(class, "resource.fs.zfs.removed") == 0 ||
304 	    strcmp(class, "resource.fs.zfs.statechange") == 0 ||
305 	    strcmp(class, "sysevent.fs.zfs.vdev_remove")  == 0) {
306 		fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
307 	}
308 
309 	/*
310 	 * The SLM module only consumes disk events and vdev check events
311 	 *
312 	 * NOTE: disk events come directly from disk monitor and will
313 	 * not pass through the zfs kernel module.
314 	 */
315 	if (strstr(class, "EC_dev_") != NULL ||
316 	    strcmp(class, EC_ZFS) == 0) {
317 		(void) zfs_slm_event(class, subclass, nvl);
318 	}
319 }
320 
321 /*
322  * Events are consumed and dispatched from this thread
323  * An agent can also post an event so event list lock
324  * is not held when calling an agent.
325  * One event is consumed at a time.
326  */
327 static void *
328 zfs_agent_consumer_thread(void *arg)
329 {
330 	(void) arg;
331 
332 	for (;;) {
333 		agent_event_t *event;
334 
335 		(void) pthread_mutex_lock(&agent_lock);
336 
337 		/* wait for an event to show up */
338 		while (!agent_exiting && list_is_empty(&agent_events))
339 			(void) pthread_cond_wait(&agent_cond, &agent_lock);
340 
341 		if (agent_exiting) {
342 			(void) pthread_mutex_unlock(&agent_lock);
343 			zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
344 			    "exiting");
345 			return (NULL);
346 		}
347 
348 		if ((event = (list_head(&agent_events))) != NULL) {
349 			list_remove(&agent_events, event);
350 
351 			(void) pthread_mutex_unlock(&agent_lock);
352 
353 			/* dispatch to all event subscribers */
354 			zfs_agent_dispatch(event->ae_class, event->ae_subclass,
355 			    event->ae_nvl);
356 
357 			nvlist_free(event->ae_nvl);
358 			free(event);
359 			continue;
360 		}
361 
362 		(void) pthread_mutex_unlock(&agent_lock);
363 	}
364 
365 	return (NULL);
366 }
367 
368 void
369 zfs_agent_init(libzfs_handle_t *zfs_hdl)
370 {
371 	fmd_hdl_t *hdl;
372 
373 	g_zfs_hdl = zfs_hdl;
374 
375 	if (zfs_slm_init() != 0)
376 		zed_log_die("Failed to initialize zfs slm");
377 	zed_log_msg(LOG_INFO, "Add Agent: init");
378 
379 	hdl = fmd_module_hdl("zfs-diagnosis");
380 	_zfs_diagnosis_init(hdl);
381 	if (!fmd_module_initialized(hdl))
382 		zed_log_die("Failed to initialize zfs diagnosis");
383 
384 	hdl = fmd_module_hdl("zfs-retire");
385 	_zfs_retire_init(hdl);
386 	if (!fmd_module_initialized(hdl))
387 		zed_log_die("Failed to initialize zfs retire");
388 
389 	list_create(&agent_events, sizeof (agent_event_t),
390 	    offsetof(struct agent_event, ae_node));
391 
392 	if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
393 	    NULL) != 0) {
394 		list_destroy(&agent_events);
395 		zed_log_die("Failed to initialize agents");
396 	}
397 	pthread_setname_np(g_agents_tid, "agents");
398 }
399 
400 void
401 zfs_agent_fini(void)
402 {
403 	fmd_hdl_t *hdl;
404 	agent_event_t *event;
405 
406 	agent_exiting = 1;
407 	(void) pthread_cond_signal(&agent_cond);
408 
409 	/* wait for zfs_enum_pools thread to complete */
410 	(void) pthread_join(g_agents_tid, NULL);
411 
412 	/* drain any pending events */
413 	while ((event = (list_head(&agent_events))) != NULL) {
414 		list_remove(&agent_events, event);
415 		nvlist_free(event->ae_nvl);
416 		free(event);
417 	}
418 
419 	list_destroy(&agent_events);
420 
421 	if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
422 		_zfs_retire_fini(hdl);
423 		fmd_hdl_unregister(hdl);
424 	}
425 	if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
426 		_zfs_diagnosis_fini(hdl);
427 		fmd_hdl_unregister(hdl);
428 	}
429 
430 	zed_log_msg(LOG_INFO, "Add Agent: fini");
431 	zfs_slm_fini();
432 
433 	g_zfs_hdl = NULL;
434 }
435