xref: /freebsd/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 2b833162)
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32 
33 /**
34  * \file zfsd_event.cc
35  */
36 #include <sys/cdefs.h>
37 #include <sys/byteorder.h>
38 #include <sys/time.h>
39 #include <sys/fs/zfs.h>
40 #include <sys/vdev_impl.h>
41 
42 #include <syslog.h>
43 
44 #include <libzfs.h>
45 #include <libzutil.h>
46 /*
47  * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with
48  * C++ flush methods
49  */
50 #undef   flush
51 #undef	__init
52 #include <list>
53 #include <map>
54 #include <sstream>
55 #include <string>
56 
57 #include <devdctl/guid.h>
58 #include <devdctl/event.h>
59 #include <devdctl/event_factory.h>
60 #include <devdctl/exception.h>
61 #include <devdctl/consumer.h>
62 
63 #include "callout.h"
64 #include "vdev_iterator.h"
65 #include "zfsd_event.h"
66 #include "case_file.h"
67 #include "vdev.h"
68 #include "zfsd.h"
69 #include "zfsd_exception.h"
70 #include "zpool_list.h"
71 
72 __FBSDID("$FreeBSD$");
73 /*============================ Namespace Control =============================*/
74 using DevdCtl::Event;
75 using DevdCtl::Guid;
76 using DevdCtl::NVPairMap;
77 using std::stringstream;
78 
79 /*=========================== Class Implementations ==========================*/
80 
81 /*-------------------------------- GeomEvent --------------------------------*/
82 
83 //- GeomEvent Static Public Methods -------------------------------------------
84 Event *
85 GeomEvent::Builder(Event::Type type,
86 		   NVPairMap &nvPairs,
87 		   const string &eventString)
88 {
89 	return (new GeomEvent(type, nvPairs, eventString));
90 }
91 
92 //- GeomEvent Virtual Public Methods ------------------------------------------
93 Event *
94 GeomEvent::DeepCopy() const
95 {
96 	return (new GeomEvent(*this));
97 }
98 
99 bool
100 GeomEvent::Process() const
101 {
102 	/*
103 	 * We only use GEOM events to repair damaged pools.  So return early if
104 	 * there are no damaged pools
105 	 */
106 	if (CaseFile::Empty())
107 		return (false);
108 
109 	/*
110 	 * We are only concerned with arrivals and physical path changes,
111 	 * because those can be used to satisfy online and autoreplace
112 	 * operations
113 	 */
114 	if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE")
115 		return (false);
116 
117 	/* Log the event since it is of interest. */
118 	Log(LOG_INFO);
119 
120 	string devPath;
121 	if (!DevPath(devPath))
122 		return (false);
123 
124 	int devFd(open(devPath.c_str(), O_RDONLY));
125 	if (devFd == -1)
126 		return (false);
127 
128 	bool inUse;
129 	bool degraded;
130 	nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded));
131 
132 	string physPath;
133         bool havePhysPath(PhysicalPath(physPath));
134 
135 	string devName;
136 	DevName(devName);
137 	close(devFd);
138 
139 	if (inUse && devLabel != NULL) {
140 		OnlineByLabel(devPath, physPath, devLabel);
141 	} else if (degraded) {
142 		syslog(LOG_INFO, "%s is marked degraded.  Ignoring "
143 		       "as a replace by physical path candidate.\n",
144 		       devName.c_str());
145 	} else if (havePhysPath) {
146 		/*
147 		 * TODO: attempt to resolve events using every casefile
148 		 * that matches this physpath
149 		 */
150 		CaseFile *caseFile(CaseFile::Find(physPath));
151 		if (caseFile != NULL) {
152 			syslog(LOG_INFO,
153 			       "Found CaseFile(%s:%s:%s) - ReEvaluating\n",
154 			       caseFile->PoolGUIDString().c_str(),
155 			       caseFile->VdevGUIDString().c_str(),
156 			       zpool_state_to_name(caseFile->VdevState(),
157 						   VDEV_AUX_NONE));
158 			caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL);
159 		}
160 	}
161 	return (false);
162 }
163 
164 //- GeomEvent Protected Methods -----------------------------------------------
165 GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs,
166 			       const string &eventString)
167  : DevdCtl::GeomEvent(type, nvpairs, eventString)
168 {
169 }
170 
171 GeomEvent::GeomEvent(const GeomEvent &src)
172  : DevdCtl::GeomEvent::GeomEvent(src)
173 {
174 }
175 
176 nvlist_t *
177 GeomEvent::ReadLabel(int devFd, bool &inUse, bool &degraded)
178 {
179 	pool_state_t poolState;
180 	char        *poolName;
181 	boolean_t    b_inuse;
182 	int          nlabels;
183 
184 	inUse    = false;
185 	degraded = false;
186 	poolName = NULL;
187 	if (zpool_in_use(g_zfsHandle, devFd, &poolState,
188 			 &poolName, &b_inuse) == 0) {
189 		nvlist_t *devLabel = NULL;
190 
191 		inUse = b_inuse == B_TRUE;
192 		if (poolName != NULL)
193 			free(poolName);
194 
195 		if (zpool_read_label(devFd, &devLabel, &nlabels) != 0)
196 			return (NULL);
197 		/*
198 		 * If we find a disk with fewer than the maximum number of
199 		 * labels, it might be the whole disk of a partitioned disk
200 		 * where ZFS resides on a partition.  In that case, we should do
201 		 * nothing and wait for the partition to appear.  Or, the disk
202 		 * might be damaged.  In that case, zfsd should do nothing and
203 		 * wait for the sysadmin to decide.
204 		 */
205 		if (nlabels != VDEV_LABELS || devLabel == NULL) {
206 			nvlist_free(devLabel);
207 			return (NULL);
208 		}
209 
210 		try {
211 			Vdev vdev(devLabel);
212 			degraded = vdev.State() != VDEV_STATE_HEALTHY;
213 			return (devLabel);
214 		} catch (ZfsdException &exp) {
215 			string devName = fdevname(devFd);
216 			string devPath = _PATH_DEV + devName;
217 			string context("GeomEvent::ReadLabel: "
218 				     + devPath + ": ");
219 
220 			exp.GetString().insert(0, context);
221 			exp.Log();
222 			nvlist_free(devLabel);
223 		}
224 	}
225 	return (NULL);
226 }
227 
228 bool
229 GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
230 			      nvlist_t *devConfig)
231 {
232 	bool ret = false;
233 	try {
234 		CaseFileList case_list;
235 		/*
236 		 * A device with ZFS label information has been
237 		 * inserted.  If it matches a device for which we
238 		 * have a case, see if we can solve that case.
239 		 */
240 		syslog(LOG_INFO, "Interrogating VDEV label for %s\n",
241 		       devPath.c_str());
242 		Vdev vdev(devConfig);
243 		CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list);
244 		for (CaseFileList::iterator curr = case_list.begin();
245 		    curr != case_list.end(); curr++) {
246 			ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev);
247 		}
248 		return (ret);
249 
250 	} catch (ZfsdException &exp) {
251 		string context("GeomEvent::OnlineByLabel: " + devPath + ": ");
252 
253 		exp.GetString().insert(0, context);
254 		exp.Log();
255 	}
256 	return (ret);
257 }
258 
259 
260 /*--------------------------------- ZfsEvent ---------------------------------*/
261 //- ZfsEvent Static Public Methods ---------------------------------------------
262 DevdCtl::Event *
263 ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs,
264 		  const string &eventString)
265 {
266 	return (new ZfsEvent(type, nvpairs, eventString));
267 }
268 
269 //- ZfsEvent Virtual Public Methods --------------------------------------------
270 Event *
271 ZfsEvent::DeepCopy() const
272 {
273 	return (new ZfsEvent(*this));
274 }
275 
276 bool
277 ZfsEvent::Process() const
278 {
279 	string logstr("");
280 
281 	if (!Contains("class") && !Contains("type")) {
282 		syslog(LOG_ERR,
283 		       "ZfsEvent::Process: Missing class or type data.");
284 		return (false);
285 	}
286 
287 	/* On config syncs, replay any queued events first. */
288 	if (Value("type").find("misc.fs.zfs.config_sync") == 0) {
289 		/*
290 		 * Even if saved events are unconsumed the second time
291 		 * around, drop them.  Any events that still can't be
292 		 * consumed are probably referring to vdevs or pools that
293 		 * no longer exist.
294 		 */
295 		ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true);
296 		CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
297 	}
298 
299 	if (Value("type").find("misc.fs.zfs.") == 0) {
300 		/* Configuration changes, resilver events, etc. */
301 		ProcessPoolEvent();
302 		return (false);
303 	}
304 
305 	if (!Contains("pool_guid") || !Contains("vdev_guid")) {
306 		/* Only currently interested in Vdev related events. */
307 		return (false);
308 	}
309 
310 	CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
311 	if (caseFile != NULL) {
312 		Log(LOG_INFO);
313 		syslog(LOG_INFO, "Evaluating existing case file\n");
314 		caseFile->ReEvaluate(*this);
315 		return (false);
316 	}
317 
318 	/* Skip events that can't be handled. */
319 	Guid poolGUID(PoolGUID());
320 	/* If there are no replicas for a pool, then it's not manageable. */
321 	if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) {
322 		stringstream msg;
323 		msg << "No replicas available for pool "  << poolGUID;
324 		msg << ", ignoring";
325 		Log(LOG_INFO);
326 		syslog(LOG_INFO, "%s", msg.str().c_str());
327 		return (false);
328 	}
329 
330 	/*
331 	 * Create a case file for this vdev, and have it
332 	 * evaluate the event.
333 	 */
334 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
335 	if (zpl.empty()) {
336 		stringstream msg;
337 		int priority = LOG_INFO;
338 		msg << "ZfsEvent::Process: Event for unknown pool ";
339 		msg << poolGUID << " ";
340 		msg << "queued";
341 		Log(LOG_INFO);
342 		syslog(priority, "%s", msg.str().c_str());
343 		return (true);
344 	}
345 
346 	nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID());
347 	if (vdevConfig == NULL) {
348 		stringstream msg;
349 		int priority = LOG_INFO;
350 		msg << "ZfsEvent::Process: Event for unknown vdev ";
351 		msg << VdevGUID() << " ";
352 		msg << "queued";
353 		Log(LOG_INFO);
354 		syslog(priority, "%s", msg.str().c_str());
355 		return (true);
356 	}
357 
358 	Vdev vdev(zpl.front(), vdevConfig);
359 	caseFile = &CaseFile::Create(vdev);
360 	if (caseFile->ReEvaluate(*this) == false) {
361 		stringstream msg;
362 		int priority = LOG_INFO;
363 		msg << "ZfsEvent::Process: Unconsumed event for vdev(";
364 		msg << zpool_get_name(zpl.front()) << ",";
365 		msg << vdev.GUID() << ") ";
366 		msg << "queued";
367 		Log(LOG_INFO);
368 		syslog(priority, "%s", msg.str().c_str());
369 		return (true);
370 	}
371 	return (false);
372 }
373 
374 //- ZfsEvent Protected Methods -------------------------------------------------
375 ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs,
376 			   const string &eventString)
377  : DevdCtl::ZfsEvent(type, nvpairs, eventString)
378 {
379 }
380 
381 ZfsEvent::ZfsEvent(const ZfsEvent &src)
382  : DevdCtl::ZfsEvent(src)
383 {
384 }
385 
386 /*
387  * Sometimes the kernel won't detach a spare when it is no longer needed.  This
388  * can happen for example if a drive is removed, then either the pool is
389  * exported or the machine is powered off, then the drive is reinserted, then
390  * the machine is powered on or the pool is imported.  ZFSD must detach these
391  * spares itself.
392  */
393 void
394 ZfsEvent::CleanupSpares() const
395 {
396 	Guid poolGUID(PoolGUID());
397 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
398 	if (!zpl.empty()) {
399 		zpool_handle_t* hdl;
400 
401 		hdl = zpl.front();
402 		VdevIterator(hdl).Each(TryDetach, (void*)hdl);
403 	}
404 }
405 
406 void
407 ZfsEvent::ProcessPoolEvent() const
408 {
409 	bool degradedDevice(false);
410 
411 	/* The pool is destroyed.  Discard any open cases */
412 	if (Value("type") == "misc.fs.zfs.pool_destroy") {
413 		Log(LOG_INFO);
414 		CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
415 		return;
416 	}
417 
418 	CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
419 	if (caseFile != NULL) {
420 		if (caseFile->VdevState() != VDEV_STATE_UNKNOWN
421 		 && caseFile->VdevState() < VDEV_STATE_HEALTHY)
422 			degradedDevice = true;
423 
424 		Log(LOG_INFO);
425 		caseFile->ReEvaluate(*this);
426 	}
427 	else if (Value("type") == "misc.fs.zfs.resilver_finish")
428 	{
429 		/*
430 		 * It's possible to get a resilver_finish event with no
431 		 * corresponding casefile.  For example, if a damaged pool were
432 		 * exported, repaired, then reimported.
433 		 */
434 		Log(LOG_INFO);
435 		CleanupSpares();
436 	}
437 
438 	if (Value("type") == "misc.fs.zfs.vdev_remove"
439 	 && degradedDevice == false) {
440 
441 		/* See if any other cases can make use of this device. */
442 		Log(LOG_INFO);
443 		ZfsDaemon::RequestSystemRescan();
444 	}
445 }
446 
447 bool
448 ZfsEvent::TryDetach(Vdev &vdev, void *cbArg)
449 {
450 	/*
451 	 * Outline:
452 	 * if this device is a spare, and its parent includes one healthy,
453 	 * non-spare child, then detach this device.
454 	 */
455 	zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg));
456 
457 	if (vdev.IsSpare()) {
458 		std::list<Vdev> siblings;
459 		std::list<Vdev>::iterator siblings_it;
460 		boolean_t cleanup = B_FALSE;
461 
462 		Vdev parent = vdev.Parent();
463 		siblings = parent.Children();
464 
465 		/* Determine whether the parent should be cleaned up */
466 		for (siblings_it = siblings.begin();
467 		     siblings_it != siblings.end();
468 		     siblings_it++) {
469 			Vdev sibling = *siblings_it;
470 
471 			if (!sibling.IsSpare() &&
472 			     sibling.State() == VDEV_STATE_HEALTHY) {
473 				cleanup = B_TRUE;
474 				break;
475 			}
476 		}
477 
478 		if (cleanup) {
479 			syslog(LOG_INFO, "Detaching spare vdev %s from pool %s",
480 			       vdev.Path().c_str(), zpool_get_name(hdl));
481 			zpool_vdev_detach(hdl, vdev.Path().c_str());
482 		}
483 
484 	}
485 
486 	/* Always return false, because there may be other spares to detach */
487 	return (false);
488 }
489