1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24  * Copyright (c) 2017, Intel Corporation.
25  * Copyright (c) 2023-2024, Klara Inc.
26  */
27 
28 /*
29  * ZFS Fault Injector
30  *
31  * This userland component takes a set of options and uses libzpool to translate
32  * from a user-visible object type and name to an internal representation.
33  * There are two basic types of faults: device faults and data faults.
34  *
35  *
36  * DEVICE FAULTS
37  *
38  * Errors can be injected into a particular vdev using the '-d' option.  This
39  * option takes a path or vdev GUID to uniquely identify the device within a
40  * pool.  There are four types of errors that can be injected, IO, ENXIO,
41  * ECHILD, and EILSEQ.  These can be controlled through the '-e' option and the
42  * default is ENXIO.  For EIO failures, any attempt to read data from the device
43  * will return EIO, but a subsequent attempt to reopen the device will succeed.
44  * For ENXIO failures, any attempt to read from the device will return EIO, but
45  * any attempt to reopen the device will also return ENXIO.  The EILSEQ failures
46  * only apply to read operations (-T read) and will flip a bit after the device
47  * has read the original data.
48  *
49  * For label faults, the -L option must be specified. This allows faults
50  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
51  * of all the labels for the specified device.
52  *
53  * This form of the command looks like:
54  *
55  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
56  *
57  *
58  * DATA FAULTS
59  *
60  * We begin with a tuple of the form:
61  *
62  * 	<type,level,range,object>
63  *
64  * 	type	A string describing the type of data to target.  Each type
65  * 		implicitly describes how to interpret 'object'. Currently,
66  * 		the following values are supported:
67  *
68  * 		data		User data for a file
69  * 		dnode		Dnode for a file or directory
70  *
71  *		The following MOS objects are special.  Instead of injecting
72  *		errors on a particular object or blkid, we inject errors across
73  *		all objects of the given type.
74  *
75  * 		mos		Any data in the MOS
76  * 		mosdir		object directory
77  * 		config		pool configuration
78  * 		bpobj		blkptr list
79  * 		spacemap	spacemap
80  * 		metaslab	metaslab
81  * 		errlog		persistent error log
82  *
83  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
84  * 		a range is given, this corresponds to the indirect block
85  * 		corresponding to the specific range.
86  *
87  *	range	A numerical range [start,end) within the object.  Defaults to
88  *		the full size of the file.
89  *
90  * 	object	A string describing the logical location of the object.  For
91  * 		files and directories (currently the only supported types),
92  * 		this is the path of the object on disk.
93  *
94  * This is translated, via libzpool, into the following internal representation:
95  *
96  * 	<type,objset,object,level,range>
97  *
98  * These types should be self-explanatory.  This tuple is then passed to the
99  * kernel via a special ioctl() to initiate fault injection for the given
100  * object.  Note that 'type' is not strictly necessary for fault injection, but
101  * is used when translating existing faults into a human-readable string.
102  *
103  *
104  * The command itself takes one of the forms:
105  *
106  * 	zinject
107  * 	zinject <-a | -u pool>
108  * 	zinject -c <id|all>
109  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
110  *	    [-r range] <object>
111  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
112  *
113  * With no arguments, the command prints all currently registered injection
114  * handlers, with their numeric identifiers.
115  *
116  * The '-c' option will clear the given handler, or all handlers if 'all' is
117  * specified.
118  *
119  * The '-e' option takes a string describing the errno to simulate.  This must
120  * be one of 'io', 'checksum', 'decompress', or 'decrypt'.  In most cases this
121  * will result in the same behavior, but RAID-Z will produce a different set of
122  * ereports for this situation.
123  *
124  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
125  * specified, then the ARC cache is flushed appropriately.  If '-u' is
126  * specified, then the underlying SPA is unloaded.  Either of these flags can be
127  * specified independently of any other handlers.  The '-m' flag automatically
128  * does an unmount and remount of the underlying dataset to aid in flushing the
129  * cache.
130  *
131  * The '-f' flag controls the frequency of errors injected, expressed as a
132  * real number percentage between 0.0001 and 100.  The default is 100.
133  *
134  * The this form is responsible for actually injecting the handler into the
135  * framework.  It takes the arguments described above, translates them to the
136  * internal tuple using libzpool, and then issues an ioctl() to register the
137  * handler.
138  *
139  * The final form can target a specific bookmark, regardless of whether a
140  * human-readable interface has been designed.  It allows developers to specify
141  * a particular block by number.
142  */
143 
144 #include <errno.h>
145 #include <fcntl.h>
146 #include <stdio.h>
147 #include <stdlib.h>
148 #include <string.h>
149 #include <strings.h>
150 #include <unistd.h>
151 
152 #include <sys/fs/zfs.h>
153 #include <sys/mount.h>
154 
155 #include <libzfs.h>
156 
157 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
158 
159 #include "zinject.h"
160 
161 libzfs_handle_t *g_zfs;
162 int zfs_fd;
163 
164 static const char *const errtable[TYPE_INVAL] = {
165 	"data",
166 	"dnode",
167 	"mos",
168 	"mosdir",
169 	"metaslab",
170 	"config",
171 	"bpobj",
172 	"spacemap",
173 	"errlog",
174 	"uber",
175 	"nvlist",
176 	"pad1",
177 	"pad2"
178 };
179 
180 static err_type_t
name_to_type(const char * arg)181 name_to_type(const char *arg)
182 {
183 	int i;
184 	for (i = 0; i < TYPE_INVAL; i++)
185 		if (strcmp(errtable[i], arg) == 0)
186 			return (i);
187 
188 	return (TYPE_INVAL);
189 }
190 
191 static const char *
type_to_name(uint64_t type)192 type_to_name(uint64_t type)
193 {
194 	switch (type) {
195 	case DMU_OT_OBJECT_DIRECTORY:
196 		return ("mosdir");
197 	case DMU_OT_OBJECT_ARRAY:
198 		return ("metaslab");
199 	case DMU_OT_PACKED_NVLIST:
200 		return ("config");
201 	case DMU_OT_BPOBJ:
202 		return ("bpobj");
203 	case DMU_OT_SPACE_MAP:
204 		return ("spacemap");
205 	case DMU_OT_ERROR_LOG:
206 		return ("errlog");
207 	default:
208 		return ("-");
209 	}
210 }
211 
212 struct errstr {
213 	int		err;
214 	const char	*str;
215 };
216 static const struct errstr errstrtable[] = {
217 	{ EIO,		"io" },
218 	{ ECKSUM,	"checksum" },
219 	{ EINVAL,	"decompress" },
220 	{ EACCES,	"decrypt" },
221 	{ ENXIO,	"nxio" },
222 	{ ECHILD,	"dtl" },
223 	{ EILSEQ,	"corrupt" },
224 	{ ENOSYS,	"noop" },
225 	{ 0, NULL },
226 };
227 
228 static int
str_to_err(const char * str)229 str_to_err(const char *str)
230 {
231 	for (int i = 0; errstrtable[i].str != NULL; i++)
232 		if (strcasecmp(errstrtable[i].str, str) == 0)
233 			return (errstrtable[i].err);
234 	return (-1);
235 }
236 static const char *
err_to_str(int err)237 err_to_str(int err)
238 {
239 	for (int i = 0; errstrtable[i].str != NULL; i++)
240 		if (errstrtable[i].err == err)
241 			return (errstrtable[i].str);
242 	return ("[unknown]");
243 }
244 
245 /*
246  * Print usage message.
247  */
248 void
usage(void)249 usage(void)
250 {
251 	(void) printf(
252 	    "usage:\n"
253 	    "\n"
254 	    "\tzinject\n"
255 	    "\n"
256 	    "\t\tList all active injection records.\n"
257 	    "\n"
258 	    "\tzinject -c <id|all>\n"
259 	    "\n"
260 	    "\t\tClear the particular record (if given a numeric ID), or\n"
261 	    "\t\tall records if 'all' is specified.\n"
262 	    "\n"
263 	    "\tzinject -p <function name> pool\n"
264 	    "\t\tInject a panic fault at the specified function. Only \n"
265 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
266 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
267 	    "\n"
268 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
269 	    "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
270 	    "\t\tInject a fault into a particular device or the device's\n"
271 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
272 	    "\t\t'pad1', or 'pad2'.\n"
273 	    "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
274 	    "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n"
275 	    "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
276 	    "\t\tdevice error injection to a percentage of the IOs.\n"
277 	    "\n"
278 	    "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
279 	    "\t\tPerform a specific action on a particular device.\n"
280 	    "\n"
281 	    "\tzinject -d device -D latency:lanes pool\n"
282 	    "\n"
283 	    "\t\tAdd an artificial delay to IO requests on a particular\n"
284 	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
285 	    "\t\tmilliseconds to complete. Each delay has an associated\n"
286 	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
287 	    "\t\tIO requests that can be processed.\n"
288 	    "\n"
289 	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
290 	    "\t\tthe device will only be able to service a single IO request\n"
291 	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
292 	    "\t\tif only a single request is submitted every 10 ms, the\n"
293 	    "\t\taverage latency will be 10 ms; but if more than one request\n"
294 	    "\t\tis submitted every 10 ms, the average latency will be more\n"
295 	    "\t\tthan 10 ms.\n"
296 	    "\n"
297 	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
298 	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
299 	    "\t\ttwo requests at a time, each with a minimum latency of\n"
300 	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
301 	    "\t\tthe average latency will be 10 ms; but if more than two\n"
302 	    "\t\trequests are submitted every 10 ms, the average latency\n"
303 	    "\t\twill be more than 10 ms.\n"
304 	    "\n"
305 	    "\t\tAlso note, these delays are additive. So two invocations\n"
306 	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
307 	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
308 	    "\t\tlanes with differing target latencies. For example, an\n"
309 	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
310 	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
311 	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
312 	    "\n"
313 	    "\tzinject -P import|export -s <seconds> pool\n"
314 	    "\t\tAdd an artificial delay to a future pool import or export,\n"
315 	    "\t\tsuch that the operation takes a minimum of supplied seconds\n"
316 	    "\t\tto complete.\n"
317 	    "\n"
318 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
319 	    "\t\tCause the pool to stop writing blocks yet not\n"
320 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
321 	    "\t\tthat fails to honor cache flush requests.\n"
322 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
323 	    "\t\tat the end of the duration.\n"
324 	    "\n"
325 	    "\tzinject -b objset:object:level:blkid pool\n"
326 	    "\n"
327 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
328 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
329 	    "\t\thexadecimal, and only one block can be specified.\n"
330 	    "\n"
331 	    "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
332 	    "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
333 	    "\n"
334 	    "\t\tInject an error into the object specified by the '-t' option\n"
335 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
336 	    "\t\tinterpreted depending on the '-t' option.\n"
337 	    "\n"
338 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
339 	    "\t\t-e\tInject a specific error.  Must be one of 'io',\n"
340 	    "\t\t\t'checksum', 'decompress', or 'decrypt'.  Default is 'io'.\n"
341 	    "\t\t-C\tInject the given error only into specific DVAs. The\n"
342 	    "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
343 	    "\t\t\tseparated by commas (ex. '0,2').\n"
344 	    "\t\t-l\tInject error at a particular block level. Default is "
345 	    "0.\n"
346 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
347 	    "\t\t-r\tInject error over a particular logical range of an\n"
348 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
349 	    "\t\t\trange according to the object's properties.\n"
350 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
351 	    "\t\t\tassociated object.\n"
352 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
353 	    "\t\t\ta pool object.\n"
354 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
355 	    "\t\t\ta percentage between 0.0001 and 100.\n"
356 	    "\n"
357 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
358 	    "\t\t\tfile.  The object must be specified as a complete path\n"
359 	    "\t\t\tto a file on a ZFS filesystem.\n"
360 	    "\n"
361 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
362 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
363 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
364 	    "\t\t\tis specified as a complete path to a file or directory\n"
365 	    "\t\t\ton a ZFS filesystem.\n"
366 	    "\n"
367 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
368 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
369 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
370 	    "\t\t\tthe poolname.\n");
371 }
372 
373 static int
iter_handlers(int (* func)(int,const char *,zinject_record_t *,void *),void * data)374 iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
375     void *data)
376 {
377 	zfs_cmd_t zc = {"\0"};
378 	int ret;
379 
380 	while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
381 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
382 		    &zc.zc_inject_record, data)) != 0)
383 			return (ret);
384 
385 	if (errno != ENOENT) {
386 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
387 		    strerror(errno));
388 		return (-1);
389 	}
390 
391 	return (0);
392 }
393 
394 static int
print_data_handler(int id,const char * pool,zinject_record_t * record,void * data)395 print_data_handler(int id, const char *pool, zinject_record_t *record,
396     void *data)
397 {
398 	int *count = data;
399 
400 	if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
401 	    record->zi_duration != 0) {
402 		return (0);
403 	}
404 
405 	if (*count == 0) {
406 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  "
407 		    "%-15s\n", "ID", "POOL", "OBJSET", "OBJECT", "TYPE",
408 		    "LVL", "DVAs", "RANGE");
409 		(void) printf("---  ---------------  ------  "
410 		    "------  --------  ---  ----  ---------------\n");
411 	}
412 
413 	*count += 1;
414 
415 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %-3d  0x%02x  ",
416 	    id, pool, (u_longlong_t)record->zi_objset,
417 	    (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
418 	    record->zi_level, record->zi_dvas);
419 
420 
421 	if (record->zi_start == 0 &&
422 	    record->zi_end == -1ULL)
423 		(void) printf("all\n");
424 	else
425 		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
426 		    (u_longlong_t)record->zi_end);
427 
428 	return (0);
429 }
430 
431 static int
print_device_handler(int id,const char * pool,zinject_record_t * record,void * data)432 print_device_handler(int id, const char *pool, zinject_record_t *record,
433     void *data)
434 {
435 	static const char *iotypestr[] = {
436 	    "null", "read", "write", "free", "claim", "flush", "trim", "all",
437 	};
438 
439 	int *count = data;
440 
441 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
442 		return (0);
443 
444 	if (record->zi_cmd == ZINJECT_DELAY_IO)
445 		return (0);
446 
447 	if (*count == 0) {
448 		(void) printf("%3s  %-15s  %-16s  %-5s  %-10s  %-9s\n",
449 		    "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ");
450 		(void) printf(
451 		    "---  ---------------  ----------------  "
452 		    "-----  ----------  ---------\n");
453 	}
454 
455 	*count += 1;
456 
457 	double freq = record->zi_freq == 0 ? 100.0f :
458 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
459 
460 	(void) printf("%3d  %-15s  %llx  %-5s  %-10s  %8.4f%%\n", id, pool,
461 	    (u_longlong_t)record->zi_guid, iotypestr[record->zi_iotype],
462 	    err_to_str(record->zi_error), freq);
463 
464 	return (0);
465 }
466 
467 static int
print_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)468 print_delay_handler(int id, const char *pool, zinject_record_t *record,
469     void *data)
470 {
471 	int *count = data;
472 
473 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
474 		return (0);
475 
476 	if (record->zi_cmd != ZINJECT_DELAY_IO)
477 		return (0);
478 
479 	if (*count == 0) {
480 		(void) printf("%3s  %-15s  %-15s  %-15s  %s\n",
481 		    "ID", "POOL", "DELAY (ms)", "LANES", "GUID");
482 		(void) printf("---  ---------------  ---------------  "
483 		    "---------------  ----------------\n");
484 	}
485 
486 	*count += 1;
487 
488 	(void) printf("%3d  %-15s  %-15llu  %-15llu  %llx\n", id, pool,
489 	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
490 	    (u_longlong_t)record->zi_nlanes,
491 	    (u_longlong_t)record->zi_guid);
492 
493 	return (0);
494 }
495 
496 static int
print_panic_handler(int id,const char * pool,zinject_record_t * record,void * data)497 print_panic_handler(int id, const char *pool, zinject_record_t *record,
498     void *data)
499 {
500 	int *count = data;
501 
502 	if (record->zi_func[0] == '\0')
503 		return (0);
504 
505 	if (*count == 0) {
506 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
507 		(void) printf("---  ---------------  ----------------\n");
508 	}
509 
510 	*count += 1;
511 
512 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
513 
514 	return (0);
515 }
516 
517 static int
print_pool_delay_handler(int id,const char * pool,zinject_record_t * record,void * data)518 print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
519     void *data)
520 {
521 	int *count = data;
522 
523 	if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
524 	    record->zi_cmd != ZINJECT_DELAY_EXPORT) {
525 		return (0);
526 	}
527 
528 	if (*count == 0) {
529 		(void) printf("%3s  %-19s  %-11s  %s\n",
530 		    "ID", "POOL", "DELAY (sec)", "COMMAND");
531 		(void) printf("---  -------------------  -----------"
532 		    "  -------\n");
533 	}
534 
535 	*count += 1;
536 
537 	(void) printf("%3d  %-19s  %-11llu  %s\n",
538 	    id, pool, (u_longlong_t)record->zi_duration,
539 	    record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
540 
541 	return (0);
542 }
543 
544 /*
545  * Print all registered error handlers.  Returns the number of handlers
546  * registered.
547  */
548 static int
print_all_handlers(void)549 print_all_handlers(void)
550 {
551 	int count = 0, total = 0;
552 
553 	(void) iter_handlers(print_device_handler, &count);
554 	if (count > 0) {
555 		total += count;
556 		(void) printf("\n");
557 		count = 0;
558 	}
559 
560 	(void) iter_handlers(print_delay_handler, &count);
561 	if (count > 0) {
562 		total += count;
563 		(void) printf("\n");
564 		count = 0;
565 	}
566 
567 	(void) iter_handlers(print_data_handler, &count);
568 	if (count > 0) {
569 		total += count;
570 		(void) printf("\n");
571 		count = 0;
572 	}
573 
574 	(void) iter_handlers(print_pool_delay_handler, &count);
575 	if (count > 0) {
576 		total += count;
577 		(void) printf("\n");
578 		count = 0;
579 	}
580 
581 	(void) iter_handlers(print_panic_handler, &count);
582 
583 	return (count + total);
584 }
585 
586 static int
cancel_one_handler(int id,const char * pool,zinject_record_t * record,void * data)587 cancel_one_handler(int id, const char *pool, zinject_record_t *record,
588     void *data)
589 {
590 	(void) pool, (void) record, (void) data;
591 	zfs_cmd_t zc = {"\0"};
592 
593 	zc.zc_guid = (uint64_t)id;
594 
595 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
596 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
597 		    id, strerror(errno));
598 		return (1);
599 	}
600 
601 	return (0);
602 }
603 
604 /*
605  * Remove all fault injection handlers.
606  */
607 static int
cancel_all_handlers(void)608 cancel_all_handlers(void)
609 {
610 	int ret = iter_handlers(cancel_one_handler, NULL);
611 
612 	if (ret == 0)
613 		(void) printf("removed all registered handlers\n");
614 
615 	return (ret);
616 }
617 
618 /*
619  * Remove a specific fault injection handler.
620  */
621 static int
cancel_handler(int id)622 cancel_handler(int id)
623 {
624 	zfs_cmd_t zc = {"\0"};
625 
626 	zc.zc_guid = (uint64_t)id;
627 
628 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
629 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
630 		    id, strerror(errno));
631 		return (1);
632 	}
633 
634 	(void) printf("removed handler %d\n", id);
635 
636 	return (0);
637 }
638 
639 /*
640  * Register a new fault injection handler.
641  */
642 static int
register_handler(const char * pool,int flags,zinject_record_t * record,int quiet)643 register_handler(const char *pool, int flags, zinject_record_t *record,
644     int quiet)
645 {
646 	zfs_cmd_t zc = {"\0"};
647 
648 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
649 	zc.zc_inject_record = *record;
650 	zc.zc_guid = flags;
651 
652 	if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
653 		const char *errmsg = strerror(errno);
654 
655 		switch (errno) {
656 		case EDOM:
657 			errmsg = "block level exceeds max level of object";
658 			break;
659 		case EEXIST:
660 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
661 				errmsg = "pool already imported";
662 			if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
663 				errmsg = "a handler already exists";
664 			break;
665 		case ENOENT:
666 			/* import delay injector running on older zfs module */
667 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
668 				errmsg = "import delay injector not supported";
669 			break;
670 		default:
671 			break;
672 		}
673 		(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
674 		return (1);
675 	}
676 
677 	if (flags & ZINJECT_NULL)
678 		return (0);
679 
680 	if (quiet) {
681 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
682 	} else {
683 		(void) printf("Added handler %llu with the following "
684 		    "properties:\n", (u_longlong_t)zc.zc_guid);
685 		(void) printf("  pool: %s\n", pool);
686 		if (record->zi_guid) {
687 			(void) printf("  vdev: %llx\n",
688 			    (u_longlong_t)record->zi_guid);
689 		} else if (record->zi_func[0] != '\0') {
690 			(void) printf("  panic function: %s\n",
691 			    record->zi_func);
692 		} else if (record->zi_duration > 0) {
693 			(void) printf(" time: %lld seconds\n",
694 			    (u_longlong_t)record->zi_duration);
695 		} else if (record->zi_duration < 0) {
696 			(void) printf(" txgs: %lld \n",
697 			    (u_longlong_t)-record->zi_duration);
698 		} else if (record->zi_timer > 0) {
699 			(void) printf(" timer: %lld ms\n",
700 			    (u_longlong_t)NSEC2MSEC(record->zi_timer));
701 		} else {
702 			(void) printf("objset: %llu\n",
703 			    (u_longlong_t)record->zi_objset);
704 			(void) printf("object: %llu\n",
705 			    (u_longlong_t)record->zi_object);
706 			(void) printf("  type: %llu\n",
707 			    (u_longlong_t)record->zi_type);
708 			(void) printf(" level: %d\n", record->zi_level);
709 			if (record->zi_start == 0 &&
710 			    record->zi_end == -1ULL)
711 				(void) printf(" range: all\n");
712 			else
713 				(void) printf(" range: [%llu, %llu)\n",
714 				    (u_longlong_t)record->zi_start,
715 				    (u_longlong_t)record->zi_end);
716 			(void) printf("  dvas: 0x%x\n", record->zi_dvas);
717 		}
718 	}
719 
720 	return (0);
721 }
722 
723 static int
perform_action(const char * pool,zinject_record_t * record,int cmd)724 perform_action(const char *pool, zinject_record_t *record, int cmd)
725 {
726 	zfs_cmd_t zc = {"\0"};
727 
728 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
729 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
730 	zc.zc_guid = record->zi_guid;
731 	zc.zc_cookie = cmd;
732 
733 	if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
734 		return (0);
735 
736 	return (1);
737 }
738 
739 static int
parse_delay(char * str,uint64_t * delay,uint64_t * nlanes)740 parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
741 {
742 	unsigned long scan_delay;
743 	unsigned long scan_nlanes;
744 
745 	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
746 		return (1);
747 
748 	/*
749 	 * We explicitly disallow a delay of zero here, because we key
750 	 * off this value being non-zero in translate_device(), to
751 	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
752 	 */
753 	if (scan_delay == 0)
754 		return (1);
755 
756 	/*
757 	 * The units for the CLI delay parameter is milliseconds, but
758 	 * the data passed to the kernel is interpreted as nanoseconds.
759 	 * Thus we scale the milliseconds to nanoseconds here, and this
760 	 * nanosecond value is used to pass the delay to the kernel.
761 	 */
762 	*delay = MSEC2NSEC(scan_delay);
763 	*nlanes = scan_nlanes;
764 
765 	return (0);
766 }
767 
768 static int
parse_frequency(const char * str,uint32_t * percent)769 parse_frequency(const char *str, uint32_t *percent)
770 {
771 	double val;
772 	char *post;
773 
774 	val = strtod(str, &post);
775 	if (post == NULL || *post != '\0')
776 		return (EINVAL);
777 
778 	/* valid range is [0.0001, 100.0] */
779 	val /= 100.0f;
780 	if (val < 0.000001f || val > 1.0f)
781 		return (ERANGE);
782 
783 	/* convert to an integer for use by kernel */
784 	*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
785 
786 	return (0);
787 }
788 
789 /*
790  * This function converts a string specifier for DVAs into a bit mask.
791  * The dva's provided by the user should be 0 indexed and separated by
792  * a comma. For example:
793  *	"1"	-> 0b0010  (0x2)
794  *	"0,1"	-> 0b0011  (0x3)
795  *	"0,1,2"	-> 0b0111  (0x7)
796  */
797 static int
parse_dvas(const char * str,uint32_t * dvas_out)798 parse_dvas(const char *str, uint32_t *dvas_out)
799 {
800 	const char *c = str;
801 	uint32_t mask = 0;
802 	boolean_t need_delim = B_FALSE;
803 
804 	/* max string length is 5 ("0,1,2") */
805 	if (strlen(str) > 5 || strlen(str) == 0)
806 		return (EINVAL);
807 
808 	while (*c != '\0') {
809 		switch (*c) {
810 		case '0':
811 		case '1':
812 		case '2':
813 			/* check for pipe between DVAs */
814 			if (need_delim)
815 				return (EINVAL);
816 
817 			/* check if this DVA has been set already */
818 			if (mask & (1 << ((*c) - '0')))
819 				return (EINVAL);
820 
821 			mask |= (1 << ((*c) - '0'));
822 			need_delim = B_TRUE;
823 			break;
824 		case ',':
825 			need_delim = B_FALSE;
826 			break;
827 		default:
828 			/* check for invalid character */
829 			return (EINVAL);
830 		}
831 		c++;
832 	}
833 
834 	/* check for dangling delimiter */
835 	if (!need_delim)
836 		return (EINVAL);
837 
838 	*dvas_out = mask;
839 	return (0);
840 }
841 
842 int
main(int argc,char ** argv)843 main(int argc, char **argv)
844 {
845 	int c;
846 	char *range = NULL;
847 	char *cancel = NULL;
848 	char *end;
849 	char *raw = NULL;
850 	char *device = NULL;
851 	int level = 0;
852 	int quiet = 0;
853 	int error = 0;
854 	int domount = 0;
855 	int io_type = ZIO_TYPES;
856 	int action = VDEV_STATE_UNKNOWN;
857 	err_type_t type = TYPE_INVAL;
858 	err_type_t label = TYPE_INVAL;
859 	zinject_record_t record = { 0 };
860 	char pool[MAXNAMELEN] = "";
861 	char dataset[MAXNAMELEN] = "";
862 	zfs_handle_t *zhp = NULL;
863 	int nowrites = 0;
864 	int dur_txg = 0;
865 	int dur_secs = 0;
866 	int ret;
867 	int flags = 0;
868 	uint32_t dvas = 0;
869 
870 	if ((g_zfs = libzfs_init()) == NULL) {
871 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
872 		return (1);
873 	}
874 
875 	libzfs_print_on_error(g_zfs, B_TRUE);
876 
877 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
878 		(void) fprintf(stderr, "failed to open ZFS device\n");
879 		libzfs_fini(g_zfs);
880 		return (1);
881 	}
882 
883 	if (argc == 1) {
884 		/*
885 		 * No arguments.  Print the available handlers.  If there are no
886 		 * available handlers, direct the user to '-h' for help
887 		 * information.
888 		 */
889 		if (print_all_handlers() == 0) {
890 			(void) printf("No handlers registered.\n");
891 			(void) printf("Run 'zinject -h' for usage "
892 			    "information.\n");
893 		}
894 		libzfs_fini(g_zfs);
895 		return (0);
896 	}
897 
898 	while ((c = getopt(argc, argv,
899 	    ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
900 		switch (c) {
901 		case 'a':
902 			flags |= ZINJECT_FLUSH_ARC;
903 			break;
904 		case 'A':
905 			if (strcasecmp(optarg, "degrade") == 0) {
906 				action = VDEV_STATE_DEGRADED;
907 			} else if (strcasecmp(optarg, "fault") == 0) {
908 				action = VDEV_STATE_FAULTED;
909 			} else {
910 				(void) fprintf(stderr, "invalid action '%s': "
911 				    "must be 'degrade' or 'fault'\n", optarg);
912 				usage();
913 				libzfs_fini(g_zfs);
914 				return (1);
915 			}
916 			break;
917 		case 'b':
918 			raw = optarg;
919 			break;
920 		case 'c':
921 			cancel = optarg;
922 			break;
923 		case 'C':
924 			ret = parse_dvas(optarg, &dvas);
925 			if (ret != 0) {
926 				(void) fprintf(stderr, "invalid DVA list '%s': "
927 				    "DVAs should be 0 indexed and separated by "
928 				    "commas.\n", optarg);
929 				usage();
930 				libzfs_fini(g_zfs);
931 				return (1);
932 			}
933 			break;
934 		case 'd':
935 			device = optarg;
936 			break;
937 		case 'D':
938 			errno = 0;
939 			ret = parse_delay(optarg, &record.zi_timer,
940 			    &record.zi_nlanes);
941 			if (ret != 0) {
942 
943 				(void) fprintf(stderr, "invalid i/o delay "
944 				    "value: '%s'\n", optarg);
945 				usage();
946 				libzfs_fini(g_zfs);
947 				return (1);
948 			}
949 			break;
950 		case 'e':
951 			error = str_to_err(optarg);
952 			if (error < 0) {
953 				(void) fprintf(stderr, "invalid error type "
954 				    "'%s': must be one of: io decompress "
955 				    "decrypt nxio dtl corrupt noop\n",
956 				    optarg);
957 				usage();
958 				libzfs_fini(g_zfs);
959 				return (1);
960 			}
961 			break;
962 		case 'f':
963 			ret = parse_frequency(optarg, &record.zi_freq);
964 			if (ret != 0) {
965 				(void) fprintf(stderr, "%sfrequency value must "
966 				    "be in the range [0.0001, 100.0]\n",
967 				    ret == EINVAL ? "invalid value: " :
968 				    ret == ERANGE ? "out of range: " : "");
969 				libzfs_fini(g_zfs);
970 				return (1);
971 			}
972 			break;
973 		case 'F':
974 			record.zi_failfast = B_TRUE;
975 			break;
976 		case 'g':
977 			dur_txg = 1;
978 			record.zi_duration = (int)strtol(optarg, &end, 10);
979 			if (record.zi_duration <= 0 || *end != '\0') {
980 				(void) fprintf(stderr, "invalid duration '%s': "
981 				    "must be a positive integer\n", optarg);
982 				usage();
983 				libzfs_fini(g_zfs);
984 				return (1);
985 			}
986 			/* store duration of txgs as its negative */
987 			record.zi_duration *= -1;
988 			break;
989 		case 'h':
990 			usage();
991 			libzfs_fini(g_zfs);
992 			return (0);
993 		case 'I':
994 			/* default duration, if one hasn't yet been defined */
995 			nowrites = 1;
996 			if (dur_secs == 0 && dur_txg == 0)
997 				record.zi_duration = 30;
998 			break;
999 		case 'l':
1000 			level = (int)strtol(optarg, &end, 10);
1001 			if (*end != '\0') {
1002 				(void) fprintf(stderr, "invalid level '%s': "
1003 				    "must be an integer\n", optarg);
1004 				usage();
1005 				libzfs_fini(g_zfs);
1006 				return (1);
1007 			}
1008 			break;
1009 		case 'm':
1010 			domount = 1;
1011 			break;
1012 		case 'p':
1013 			(void) strlcpy(record.zi_func, optarg,
1014 			    sizeof (record.zi_func));
1015 			record.zi_cmd = ZINJECT_PANIC;
1016 			break;
1017 		case 'P':
1018 			if (strcasecmp(optarg, "import") == 0) {
1019 				record.zi_cmd = ZINJECT_DELAY_IMPORT;
1020 			} else if (strcasecmp(optarg, "export") == 0) {
1021 				record.zi_cmd = ZINJECT_DELAY_EXPORT;
1022 			} else {
1023 				(void) fprintf(stderr, "invalid command '%s': "
1024 				    "must be 'import' or 'export'\n", optarg);
1025 				usage();
1026 				libzfs_fini(g_zfs);
1027 				return (1);
1028 			}
1029 			break;
1030 		case 'q':
1031 			quiet = 1;
1032 			break;
1033 		case 'r':
1034 			range = optarg;
1035 			flags |= ZINJECT_CALC_RANGE;
1036 			break;
1037 		case 's':
1038 			dur_secs = 1;
1039 			record.zi_duration = (int)strtol(optarg, &end, 10);
1040 			if (record.zi_duration <= 0 || *end != '\0') {
1041 				(void) fprintf(stderr, "invalid duration '%s': "
1042 				    "must be a positive integer\n", optarg);
1043 				usage();
1044 				libzfs_fini(g_zfs);
1045 				return (1);
1046 			}
1047 			break;
1048 		case 'T':
1049 			if (strcasecmp(optarg, "read") == 0) {
1050 				io_type = ZIO_TYPE_READ;
1051 			} else if (strcasecmp(optarg, "write") == 0) {
1052 				io_type = ZIO_TYPE_WRITE;
1053 			} else if (strcasecmp(optarg, "free") == 0) {
1054 				io_type = ZIO_TYPE_FREE;
1055 			} else if (strcasecmp(optarg, "claim") == 0) {
1056 				io_type = ZIO_TYPE_CLAIM;
1057 			} else if (strcasecmp(optarg, "flush") == 0) {
1058 				io_type = ZIO_TYPE_FLUSH;
1059 			} else if (strcasecmp(optarg, "all") == 0) {
1060 				io_type = ZIO_TYPES;
1061 			} else {
1062 				(void) fprintf(stderr, "invalid I/O type "
1063 				    "'%s': must be 'read', 'write', 'free', "
1064 				    "'claim', 'flush' or 'all'\n", optarg);
1065 				usage();
1066 				libzfs_fini(g_zfs);
1067 				return (1);
1068 			}
1069 			break;
1070 		case 't':
1071 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
1072 			    !MOS_TYPE(type)) {
1073 				(void) fprintf(stderr, "invalid type '%s'\n",
1074 				    optarg);
1075 				usage();
1076 				libzfs_fini(g_zfs);
1077 				return (1);
1078 			}
1079 			break;
1080 		case 'u':
1081 			flags |= ZINJECT_UNLOAD_SPA;
1082 			break;
1083 		case 'L':
1084 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
1085 			    !LABEL_TYPE(type)) {
1086 				(void) fprintf(stderr, "invalid label type "
1087 				    "'%s'\n", optarg);
1088 				usage();
1089 				libzfs_fini(g_zfs);
1090 				return (1);
1091 			}
1092 			break;
1093 		case ':':
1094 			(void) fprintf(stderr, "option -%c requires an "
1095 			    "operand\n", optopt);
1096 			usage();
1097 			libzfs_fini(g_zfs);
1098 			return (1);
1099 		case '?':
1100 			(void) fprintf(stderr, "invalid option '%c'\n",
1101 			    optopt);
1102 			usage();
1103 			libzfs_fini(g_zfs);
1104 			return (2);
1105 		}
1106 	}
1107 
1108 	argc -= optind;
1109 	argv += optind;
1110 
1111 	if (record.zi_duration != 0 && record.zi_cmd == 0)
1112 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
1113 
1114 	if (cancel != NULL) {
1115 		/*
1116 		 * '-c' is invalid with any other options.
1117 		 */
1118 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1119 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1120 		    record.zi_freq > 0 || dvas != 0) {
1121 			(void) fprintf(stderr, "cancel (-c) incompatible with "
1122 			    "any other options\n");
1123 			usage();
1124 			libzfs_fini(g_zfs);
1125 			return (2);
1126 		}
1127 		if (argc != 0) {
1128 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
1129 			usage();
1130 			libzfs_fini(g_zfs);
1131 			return (2);
1132 		}
1133 
1134 		if (strcmp(cancel, "all") == 0) {
1135 			return (cancel_all_handlers());
1136 		} else {
1137 			int id = (int)strtol(cancel, &end, 10);
1138 			if (*end != '\0') {
1139 				(void) fprintf(stderr, "invalid handle id '%s':"
1140 				    " must be an integer or 'all'\n", cancel);
1141 				usage();
1142 				libzfs_fini(g_zfs);
1143 				return (1);
1144 			}
1145 			return (cancel_handler(id));
1146 		}
1147 	}
1148 
1149 	if (device != NULL) {
1150 		/*
1151 		 * Device (-d) injection uses a completely different mechanism
1152 		 * for doing injection, so handle it separately here.
1153 		 */
1154 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1155 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
1156 		    dvas != 0) {
1157 			(void) fprintf(stderr, "device (-d) incompatible with "
1158 			    "data error injection\n");
1159 			usage();
1160 			libzfs_fini(g_zfs);
1161 			return (2);
1162 		}
1163 
1164 		if (argc != 1) {
1165 			(void) fprintf(stderr, "device (-d) injection requires "
1166 			    "a single pool name\n");
1167 			usage();
1168 			libzfs_fini(g_zfs);
1169 			return (2);
1170 		}
1171 
1172 		(void) strlcpy(pool, argv[0], sizeof (pool));
1173 		dataset[0] = '\0';
1174 
1175 		if (error == ECKSUM) {
1176 			(void) fprintf(stderr, "device error type must be "
1177 			    "'io', 'nxio' or 'corrupt'\n");
1178 			libzfs_fini(g_zfs);
1179 			return (1);
1180 		}
1181 
1182 		if (error == EILSEQ &&
1183 		    (record.zi_freq == 0 || io_type != ZIO_TYPE_READ)) {
1184 			(void) fprintf(stderr, "device corrupt errors require "
1185 			    "io type read and a frequency value\n");
1186 			libzfs_fini(g_zfs);
1187 			return (1);
1188 		}
1189 
1190 		record.zi_iotype = io_type;
1191 		if (translate_device(pool, device, label, &record) != 0) {
1192 			libzfs_fini(g_zfs);
1193 			return (1);
1194 		}
1195 
1196 		if (record.zi_nlanes) {
1197 			switch (io_type) {
1198 			case ZIO_TYPE_READ:
1199 			case ZIO_TYPE_WRITE:
1200 			case ZIO_TYPES:
1201 				break;
1202 			default:
1203 				(void) fprintf(stderr, "I/O type for a delay "
1204 				    "must be 'read' or 'write'\n");
1205 				usage();
1206 				libzfs_fini(g_zfs);
1207 				return (1);
1208 			}
1209 		}
1210 
1211 		if (!error)
1212 			error = ENXIO;
1213 
1214 		if (action != VDEV_STATE_UNKNOWN)
1215 			return (perform_action(pool, &record, action));
1216 
1217 	} else if (raw != NULL) {
1218 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
1219 		    record.zi_cmd != ZINJECT_UNINITIALIZED ||
1220 		    record.zi_freq > 0 || dvas != 0) {
1221 			(void) fprintf(stderr, "raw (-b) format with "
1222 			    "any other options\n");
1223 			usage();
1224 			libzfs_fini(g_zfs);
1225 			return (2);
1226 		}
1227 
1228 		if (argc != 1) {
1229 			(void) fprintf(stderr, "raw (-b) format expects a "
1230 			    "single pool name\n");
1231 			usage();
1232 			libzfs_fini(g_zfs);
1233 			return (2);
1234 		}
1235 
1236 		(void) strlcpy(pool, argv[0], sizeof (pool));
1237 		dataset[0] = '\0';
1238 
1239 		if (error == ENXIO) {
1240 			(void) fprintf(stderr, "data error type must be "
1241 			    "'checksum' or 'io'\n");
1242 			libzfs_fini(g_zfs);
1243 			return (1);
1244 		}
1245 
1246 		record.zi_cmd = ZINJECT_DATA_FAULT;
1247 		if (translate_raw(raw, &record) != 0) {
1248 			libzfs_fini(g_zfs);
1249 			return (1);
1250 		}
1251 		if (!error)
1252 			error = EIO;
1253 	} else if (record.zi_cmd == ZINJECT_PANIC) {
1254 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1255 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1256 		    dvas != 0) {
1257 			(void) fprintf(stderr, "%s incompatible with other "
1258 			    "options\n", "import|export delay (-P)");
1259 			usage();
1260 			libzfs_fini(g_zfs);
1261 			return (2);
1262 		}
1263 
1264 		if (argc < 1 || argc > 2) {
1265 			(void) fprintf(stderr, "panic (-p) injection requires "
1266 			    "a single pool name and an optional id\n");
1267 			usage();
1268 			libzfs_fini(g_zfs);
1269 			return (2);
1270 		}
1271 
1272 		(void) strlcpy(pool, argv[0], sizeof (pool));
1273 		if (argv[1] != NULL)
1274 			record.zi_type = atoi(argv[1]);
1275 		dataset[0] = '\0';
1276 	} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
1277 	    record.zi_cmd == ZINJECT_DELAY_EXPORT) {
1278 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1279 		    level != 0 || device != NULL || record.zi_freq > 0 ||
1280 		    dvas != 0) {
1281 			(void) fprintf(stderr, "%s incompatible with other "
1282 			    "options\n", "import|export delay (-P)");
1283 			usage();
1284 			libzfs_fini(g_zfs);
1285 			return (2);
1286 		}
1287 
1288 		if (argc != 1 || record.zi_duration <= 0) {
1289 			(void) fprintf(stderr, "import|export delay (-P) "
1290 			    "injection requires a duration (-s) and a single "
1291 			    "pool name\n");
1292 			usage();
1293 			libzfs_fini(g_zfs);
1294 			return (2);
1295 		}
1296 
1297 		(void) strlcpy(pool, argv[0], sizeof (pool));
1298 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1299 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1300 		    level != 0 || record.zi_freq > 0 || dvas != 0) {
1301 			(void) fprintf(stderr, "hardware failure (-I) "
1302 			    "incompatible with other options\n");
1303 			usage();
1304 			libzfs_fini(g_zfs);
1305 			return (2);
1306 		}
1307 
1308 		if (nowrites == 0) {
1309 			(void) fprintf(stderr, "-s or -g meaningless "
1310 			    "without -I (ignore writes)\n");
1311 			usage();
1312 			libzfs_fini(g_zfs);
1313 			return (2);
1314 		} else if (dur_secs && dur_txg) {
1315 			(void) fprintf(stderr, "choose a duration either "
1316 			    "in seconds (-s) or a number of txgs (-g) "
1317 			    "but not both\n");
1318 			usage();
1319 			libzfs_fini(g_zfs);
1320 			return (2);
1321 		} else if (argc != 1) {
1322 			(void) fprintf(stderr, "ignore writes (-I) "
1323 			    "injection requires a single pool name\n");
1324 			usage();
1325 			libzfs_fini(g_zfs);
1326 			return (2);
1327 		}
1328 
1329 		(void) strlcpy(pool, argv[0], sizeof (pool));
1330 		dataset[0] = '\0';
1331 	} else if (type == TYPE_INVAL) {
1332 		if (flags == 0) {
1333 			(void) fprintf(stderr, "at least one of '-b', '-d', "
1334 			    "'-t', '-a', '-p', '-I' or '-u' "
1335 			    "must be specified\n");
1336 			usage();
1337 			libzfs_fini(g_zfs);
1338 			return (2);
1339 		}
1340 
1341 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1342 			(void) strlcpy(pool, argv[0], sizeof (pool));
1343 			dataset[0] = '\0';
1344 		} else if (argc != 0) {
1345 			(void) fprintf(stderr, "extraneous argument for "
1346 			    "'-f'\n");
1347 			usage();
1348 			libzfs_fini(g_zfs);
1349 			return (2);
1350 		}
1351 
1352 		flags |= ZINJECT_NULL;
1353 	} else {
1354 		if (argc != 1) {
1355 			(void) fprintf(stderr, "missing object\n");
1356 			usage();
1357 			libzfs_fini(g_zfs);
1358 			return (2);
1359 		}
1360 
1361 		if (error == ENXIO || error == EILSEQ) {
1362 			(void) fprintf(stderr, "data error type must be "
1363 			    "'checksum' or 'io'\n");
1364 			libzfs_fini(g_zfs);
1365 			return (1);
1366 		}
1367 
1368 		if (dvas != 0) {
1369 			if (error == EACCES || error == EINVAL) {
1370 				(void) fprintf(stderr, "the '-C' option may "
1371 				    "not be used with logical data errors "
1372 				    "'decrypt' and 'decompress'\n");
1373 				libzfs_fini(g_zfs);
1374 				return (1);
1375 			}
1376 
1377 			record.zi_dvas = dvas;
1378 		}
1379 
1380 		if (error == EACCES) {
1381 			if (type != TYPE_DATA) {
1382 				(void) fprintf(stderr, "decryption errors "
1383 				    "may only be injected for 'data' types\n");
1384 				libzfs_fini(g_zfs);
1385 				return (1);
1386 			}
1387 
1388 			record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1389 			/*
1390 			 * Internally, ZFS actually uses ECKSUM for decryption
1391 			 * errors since EACCES is used to indicate the key was
1392 			 * not found.
1393 			 */
1394 			error = ECKSUM;
1395 		} else {
1396 			record.zi_cmd = ZINJECT_DATA_FAULT;
1397 		}
1398 
1399 		if (translate_record(type, argv[0], range, level, &record, pool,
1400 		    dataset) != 0) {
1401 			libzfs_fini(g_zfs);
1402 			return (1);
1403 		}
1404 		if (!error)
1405 			error = EIO;
1406 	}
1407 
1408 	/*
1409 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
1410 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
1411 	 * time we access the pool.
1412 	 */
1413 	if (dataset[0] != '\0' && domount) {
1414 		if ((zhp = zfs_open(g_zfs, dataset,
1415 		    ZFS_TYPE_DATASET)) == NULL) {
1416 			libzfs_fini(g_zfs);
1417 			return (1);
1418 		}
1419 		if (zfs_unmount(zhp, NULL, 0) != 0) {
1420 			libzfs_fini(g_zfs);
1421 			return (1);
1422 		}
1423 	}
1424 
1425 	record.zi_error = error;
1426 
1427 	ret = register_handler(pool, flags, &record, quiet);
1428 
1429 	if (dataset[0] != '\0' && domount)
1430 		ret = (zfs_mount(zhp, NULL, 0) != 0);
1431 
1432 	libzfs_fini(g_zfs);
1433 
1434 	return (ret);
1435 }
1436