1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25  * Copyright 2015 RackTop Systems.
26  * Copyright (c) 2016, Intel Corporation.
27  */
28 
29 /*
30  * Pool import support functions.
31  *
32  * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
33  * these commands are expected to run in the global zone, we can assume
34  * that the devices are all readable when called.
35  *
36  * To import a pool, we rely on reading the configuration information from the
37  * ZFS label of each device.  If we successfully read the label, then we
38  * organize the configuration information in the following hierarchy:
39  *
40  *	pool guid -> toplevel vdev guid -> label txg
41  *
42  * Duplicate entries matching this same tuple will be discarded.  Once we have
43  * examined every device, we pick the best label txg config for each toplevel
44  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
45  * update any paths that have changed.  Finally, we attempt to import the pool
46  * using our derived config, and record the results.
47  */
48 
49 #include <ctype.h>
50 #include <dirent.h>
51 #include <errno.h>
52 #include <libintl.h>
53 #include <libgen.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <sys/stat.h>
59 #include <unistd.h>
60 #include <fcntl.h>
61 #include <sys/dktp/fdisk.h>
62 #include <sys/vdev_impl.h>
63 #include <sys/fs/zfs.h>
64 
65 #include <thread_pool.h>
66 #include <libzutil.h>
67 #include <libnvpair.h>
68 
69 #include "zutil_import.h"
70 
71 #ifdef HAVE_LIBUDEV
72 #include <libudev.h>
73 #include <sched.h>
74 #endif
75 #include <blkid/blkid.h>
76 
77 #define	DEV_BYID_PATH	"/dev/disk/by-id/"
78 
79 /*
80  * Skip devices with well known prefixes:
81  * there can be side effects when opening devices which need to be avoided.
82  *
83  * hpet        - High Precision Event Timer
84  * watchdog[N] - Watchdog must be closed in a special way.
85  */
86 static boolean_t
87 should_skip_dev(const char *dev)
88 {
89 	return ((strcmp(dev, "watchdog") == 0) ||
90 	    (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) ||
91 	    (strcmp(dev, "hpet") == 0));
92 }
93 
94 int
95 zfs_dev_flush(int fd)
96 {
97 	return (ioctl(fd, BLKFLSBUF));
98 }
99 
100 void
101 zpool_open_func(void *arg)
102 {
103 	rdsk_node_t *rn = arg;
104 	libpc_handle_t *hdl = rn->rn_hdl;
105 	struct stat64 statbuf;
106 	nvlist_t *config;
107 	uint64_t vdev_guid = 0;
108 	int error;
109 	int num_labels = 0;
110 	int fd;
111 
112 	if (should_skip_dev(zfs_basename(rn->rn_name)))
113 		return;
114 
115 	/*
116 	 * Ignore failed stats.  We only want regular files and block devices.
117 	 * Ignore files that are too small to hold a zpool.
118 	 */
119 	if (stat64(rn->rn_name, &statbuf) != 0 ||
120 	    (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) ||
121 	    (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE))
122 		return;
123 
124 	/*
125 	 * Preferentially open using O_DIRECT to bypass the block device
126 	 * cache which may be stale for multipath devices.  An EINVAL errno
127 	 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
128 	 */
129 	fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
130 	if ((fd < 0) && (errno == EINVAL))
131 		fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
132 	if ((fd < 0) && (errno == EACCES))
133 		hdl->lpc_open_access_error = B_TRUE;
134 	if (fd < 0)
135 		return;
136 
137 	error = zpool_read_label(fd, &config, &num_labels);
138 	if (error != 0) {
139 		(void) close(fd);
140 		return;
141 	}
142 
143 	if (num_labels == 0) {
144 		(void) close(fd);
145 		nvlist_free(config);
146 		return;
147 	}
148 
149 	/*
150 	 * Check that the vdev is for the expected guid.  Additional entries
151 	 * are speculatively added based on the paths stored in the labels.
152 	 * Entries with valid paths but incorrect guids must be removed.
153 	 */
154 	error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
155 	if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
156 		(void) close(fd);
157 		nvlist_free(config);
158 		return;
159 	}
160 
161 	(void) close(fd);
162 
163 	rn->rn_config = config;
164 	rn->rn_num_labels = num_labels;
165 
166 	/*
167 	 * Add additional entries for paths described by this label.
168 	 */
169 	if (rn->rn_labelpaths) {
170 		char *path = NULL;
171 		char *devid = NULL;
172 		char *env = NULL;
173 		rdsk_node_t *slice;
174 		avl_index_t where;
175 		int timeout;
176 		int error;
177 
178 		if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
179 			return;
180 
181 		env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
182 		if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
183 		    timeout < 0) {
184 			timeout = DISK_LABEL_WAIT;
185 		}
186 
187 		/*
188 		 * Allow devlinks to stabilize so all paths are available.
189 		 */
190 		zpool_label_disk_wait(rn->rn_name, timeout);
191 
192 		if (path != NULL) {
193 			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
194 			slice->rn_name = zutil_strdup(hdl, path);
195 			slice->rn_vdev_guid = vdev_guid;
196 			slice->rn_avl = rn->rn_avl;
197 			slice->rn_hdl = hdl;
198 			slice->rn_order = IMPORT_ORDER_PREFERRED_1;
199 			slice->rn_labelpaths = B_FALSE;
200 			pthread_mutex_lock(rn->rn_lock);
201 			if (avl_find(rn->rn_avl, slice, &where)) {
202 			pthread_mutex_unlock(rn->rn_lock);
203 				free(slice->rn_name);
204 				free(slice);
205 			} else {
206 				avl_insert(rn->rn_avl, slice, where);
207 				pthread_mutex_unlock(rn->rn_lock);
208 				zpool_open_func(slice);
209 			}
210 		}
211 
212 		if (devid != NULL) {
213 			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
214 			error = asprintf(&slice->rn_name, "%s%s",
215 			    DEV_BYID_PATH, devid);
216 			if (error == -1) {
217 				free(slice);
218 				return;
219 			}
220 
221 			slice->rn_vdev_guid = vdev_guid;
222 			slice->rn_avl = rn->rn_avl;
223 			slice->rn_hdl = hdl;
224 			slice->rn_order = IMPORT_ORDER_PREFERRED_2;
225 			slice->rn_labelpaths = B_FALSE;
226 			pthread_mutex_lock(rn->rn_lock);
227 			if (avl_find(rn->rn_avl, slice, &where)) {
228 				pthread_mutex_unlock(rn->rn_lock);
229 				free(slice->rn_name);
230 				free(slice);
231 			} else {
232 				avl_insert(rn->rn_avl, slice, where);
233 				pthread_mutex_unlock(rn->rn_lock);
234 				zpool_open_func(slice);
235 			}
236 		}
237 	}
238 }
239 
240 static const char * const
241 zpool_default_import_path[] = {
242 	"/dev/disk/by-vdev",	/* Custom rules, use first if they exist */
243 	"/dev/mapper",		/* Use multipath devices before components */
244 	"/dev/disk/by-partlabel", /* Single unique entry set by user */
245 	"/dev/disk/by-partuuid", /* Generated partition uuid */
246 	"/dev/disk/by-label",	/* Custom persistent labels */
247 	"/dev/disk/by-uuid",	/* Single unique entry and persistent */
248 	"/dev/disk/by-id",	/* May be multiple entries and persistent */
249 	"/dev/disk/by-path",	/* Encodes physical location and persistent */
250 	"/dev"			/* UNSAFE device names will change */
251 };
252 
253 const char * const *
254 zpool_default_search_paths(size_t *count)
255 {
256 	*count = ARRAY_SIZE(zpool_default_import_path);
257 	return (zpool_default_import_path);
258 }
259 
260 /*
261  * Given a full path to a device determine if that device appears in the
262  * import search path.  If it does return the first match and store the
263  * index in the passed 'order' variable, otherwise return an error.
264  */
265 static int
266 zfs_path_order(char *name, int *order)
267 {
268 	int i, error = ENOENT;
269 	char *dir, *env, *envdup, *tmp = NULL;
270 
271 	env = getenv("ZPOOL_IMPORT_PATH");
272 	if (env) {
273 		envdup = strdup(env);
274 		for (dir = strtok_r(envdup, ":", &tmp), i = 0;
275 		    dir != NULL;
276 		    dir = strtok_r(NULL, ":", &tmp), i++) {
277 			if (strncmp(name, dir, strlen(dir)) == 0) {
278 				*order = i;
279 				error = 0;
280 				break;
281 			}
282 		}
283 		free(envdup);
284 	} else {
285 		for (i = 0; i < ARRAY_SIZE(zpool_default_import_path); i++) {
286 			if (strncmp(name, zpool_default_import_path[i],
287 			    strlen(zpool_default_import_path[i])) == 0) {
288 				*order = i;
289 				error = 0;
290 				break;
291 			}
292 		}
293 	}
294 
295 	return (error);
296 }
297 
298 /*
299  * Use libblkid to quickly enumerate all known zfs devices.
300  */
301 int
302 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
303     avl_tree_t **slice_cache)
304 {
305 	rdsk_node_t *slice;
306 	blkid_cache cache;
307 	blkid_dev_iterate iter;
308 	blkid_dev dev;
309 	avl_index_t where;
310 	int error;
311 
312 	*slice_cache = NULL;
313 
314 	error = blkid_get_cache(&cache, NULL);
315 	if (error != 0)
316 		return (error);
317 
318 	error = blkid_probe_all_new(cache);
319 	if (error != 0) {
320 		blkid_put_cache(cache);
321 		return (error);
322 	}
323 
324 	iter = blkid_dev_iterate_begin(cache);
325 	if (iter == NULL) {
326 		blkid_put_cache(cache);
327 		return (EINVAL);
328 	}
329 
330 	error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
331 	if (error != 0) {
332 		blkid_dev_iterate_end(iter);
333 		blkid_put_cache(cache);
334 		return (error);
335 	}
336 
337 	*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
338 	avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
339 	    offsetof(rdsk_node_t, rn_node));
340 
341 	while (blkid_dev_next(iter, &dev) == 0) {
342 		slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
343 		slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
344 		slice->rn_vdev_guid = 0;
345 		slice->rn_lock = lock;
346 		slice->rn_avl = *slice_cache;
347 		slice->rn_hdl = hdl;
348 		slice->rn_labelpaths = B_TRUE;
349 
350 		error = zfs_path_order(slice->rn_name, &slice->rn_order);
351 		if (error == 0)
352 			slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
353 		else
354 			slice->rn_order = IMPORT_ORDER_DEFAULT;
355 
356 		pthread_mutex_lock(lock);
357 		if (avl_find(*slice_cache, slice, &where)) {
358 			free(slice->rn_name);
359 			free(slice);
360 		} else {
361 			avl_insert(*slice_cache, slice, where);
362 		}
363 		pthread_mutex_unlock(lock);
364 	}
365 
366 	blkid_dev_iterate_end(iter);
367 	blkid_put_cache(cache);
368 
369 	return (0);
370 }
371 
372 /*
373  * Linux persistent device strings for vdev labels
374  *
375  * based on libudev for consistency with libudev disk add/remove events
376  */
377 
378 typedef struct vdev_dev_strs {
379 	char	vds_devid[128];
380 	char	vds_devphys[128];
381 } vdev_dev_strs_t;
382 
383 #ifdef HAVE_LIBUDEV
384 
385 /*
386  * Obtain the persistent device id string (describes what)
387  *
388  * used by ZED vdev matching for auto-{online,expand,replace}
389  */
390 int
391 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
392 {
393 	struct udev_list_entry *entry;
394 	const char *bus;
395 	char devbyid[MAXPATHLEN];
396 
397 	/* The bus based by-id path is preferred */
398 	bus = udev_device_get_property_value(dev, "ID_BUS");
399 
400 	if (bus == NULL) {
401 		const char *dm_uuid;
402 
403 		/*
404 		 * For multipath nodes use the persistent uuid based identifier
405 		 *
406 		 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
407 		 */
408 		dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
409 		if (dm_uuid != NULL) {
410 			(void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
411 			return (0);
412 		}
413 
414 		/*
415 		 * For volumes use the persistent /dev/zvol/dataset identifier
416 		 */
417 		entry = udev_device_get_devlinks_list_entry(dev);
418 		while (entry != NULL) {
419 			const char *name;
420 
421 			name = udev_list_entry_get_name(entry);
422 			if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
423 				(void) strlcpy(bufptr, name, buflen);
424 				return (0);
425 			}
426 			entry = udev_list_entry_get_next(entry);
427 		}
428 
429 		/*
430 		 * NVME 'by-id' symlinks are similar to bus case
431 		 */
432 		struct udev_device *parent;
433 
434 		parent = udev_device_get_parent_with_subsystem_devtype(dev,
435 		    "nvme", NULL);
436 		if (parent != NULL)
437 			bus = "nvme";	/* continue with bus symlink search */
438 		else
439 			return (ENODATA);
440 	}
441 
442 	/*
443 	 * locate the bus specific by-id link
444 	 */
445 	(void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
446 	entry = udev_device_get_devlinks_list_entry(dev);
447 	while (entry != NULL) {
448 		const char *name;
449 
450 		name = udev_list_entry_get_name(entry);
451 		if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
452 			name += strlen(DEV_BYID_PATH);
453 			(void) strlcpy(bufptr, name, buflen);
454 			return (0);
455 		}
456 		entry = udev_list_entry_get_next(entry);
457 	}
458 
459 	return (ENODATA);
460 }
461 
462 /*
463  * Obtain the persistent physical location string (describes where)
464  *
465  * used by ZED vdev matching for auto-{online,expand,replace}
466  */
467 int
468 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
469 {
470 	const char *physpath = NULL;
471 	struct udev_list_entry *entry;
472 
473 	/*
474 	 * Normal disks use ID_PATH for their physical path.
475 	 */
476 	physpath = udev_device_get_property_value(dev, "ID_PATH");
477 	if (physpath != NULL && strlen(physpath) > 0) {
478 		(void) strlcpy(bufptr, physpath, buflen);
479 		return (0);
480 	}
481 
482 	/*
483 	 * Device mapper devices are virtual and don't have a physical
484 	 * path. For them we use ID_VDEV instead, which is setup via the
485 	 * /etc/vdev_id.conf file.  ID_VDEV provides a persistent path
486 	 * to a virtual device.  If you don't have vdev_id.conf setup,
487 	 * you cannot use multipath autoreplace with device mapper.
488 	 */
489 	physpath = udev_device_get_property_value(dev, "ID_VDEV");
490 	if (physpath != NULL && strlen(physpath) > 0) {
491 		(void) strlcpy(bufptr, physpath, buflen);
492 		return (0);
493 	}
494 
495 	/*
496 	 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
497 	 */
498 	entry = udev_device_get_devlinks_list_entry(dev);
499 	while (entry != NULL) {
500 		physpath = udev_list_entry_get_name(entry);
501 		if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
502 			(void) strlcpy(bufptr, physpath, buflen);
503 			return (0);
504 		}
505 		entry = udev_list_entry_get_next(entry);
506 	}
507 
508 	/*
509 	 * For all other devices fallback to using the by-uuid name.
510 	 */
511 	entry = udev_device_get_devlinks_list_entry(dev);
512 	while (entry != NULL) {
513 		physpath = udev_list_entry_get_name(entry);
514 		if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
515 			(void) strlcpy(bufptr, physpath, buflen);
516 			return (0);
517 		}
518 		entry = udev_list_entry_get_next(entry);
519 	}
520 
521 	return (ENODATA);
522 }
523 
524 /*
525  * A disk is considered a multipath whole disk when:
526  *	DEVNAME key value has "dm-"
527  *	DM_NAME key value has "mpath" prefix
528  *	DM_UUID key exists
529  *	ID_PART_TABLE_TYPE key does not exist or is not gpt
530  */
531 static boolean_t
532 udev_mpath_whole_disk(struct udev_device *dev)
533 {
534 	const char *devname, *type, *uuid;
535 
536 	devname = udev_device_get_property_value(dev, "DEVNAME");
537 	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
538 	uuid = udev_device_get_property_value(dev, "DM_UUID");
539 
540 	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
541 	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
542 	    (uuid != NULL)) {
543 		return (B_TRUE);
544 	}
545 
546 	return (B_FALSE);
547 }
548 
549 static int
550 udev_device_is_ready(struct udev_device *dev)
551 {
552 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
553 	return (udev_device_get_is_initialized(dev));
554 #else
555 	/* wait for DEVLINKS property to be initialized */
556 	return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
557 #endif
558 }
559 
560 #else
561 
562 /* ARGSUSED */
563 int
564 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
565 {
566 	return (ENODATA);
567 }
568 
569 /* ARGSUSED */
570 int
571 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
572 {
573 	return (ENODATA);
574 }
575 
576 #endif /* HAVE_LIBUDEV */
577 
578 /*
579  * Wait up to timeout_ms for udev to set up the device node.  The device is
580  * considered ready when libudev determines it has been initialized, all of
581  * the device links have been verified to exist, and it has been allowed to
582  * settle.  At this point the device the device can be accessed reliably.
583  * Depending on the complexity of the udev rules this process could take
584  * several seconds.
585  */
586 int
587 zpool_label_disk_wait(const char *path, int timeout_ms)
588 {
589 #ifdef HAVE_LIBUDEV
590 	struct udev *udev;
591 	struct udev_device *dev = NULL;
592 	char nodepath[MAXPATHLEN];
593 	char *sysname = NULL;
594 	int ret = ENODEV;
595 	int settle_ms = 50;
596 	long sleep_ms = 10;
597 	hrtime_t start, settle;
598 
599 	if ((udev = udev_new()) == NULL)
600 		return (ENXIO);
601 
602 	start = gethrtime();
603 	settle = 0;
604 
605 	do {
606 		if (sysname == NULL) {
607 			if (realpath(path, nodepath) != NULL) {
608 				sysname = strrchr(nodepath, '/') + 1;
609 			} else {
610 				(void) usleep(sleep_ms * MILLISEC);
611 				continue;
612 			}
613 		}
614 
615 		dev = udev_device_new_from_subsystem_sysname(udev,
616 		    "block", sysname);
617 		if ((dev != NULL) && udev_device_is_ready(dev)) {
618 			struct udev_list_entry *links, *link = NULL;
619 
620 			ret = 0;
621 			links = udev_device_get_devlinks_list_entry(dev);
622 
623 			udev_list_entry_foreach(link, links) {
624 				struct stat64 statbuf;
625 				const char *name;
626 
627 				name = udev_list_entry_get_name(link);
628 				errno = 0;
629 				if (stat64(name, &statbuf) == 0 && errno == 0)
630 					continue;
631 
632 				settle = 0;
633 				ret = ENODEV;
634 				break;
635 			}
636 
637 			if (ret == 0) {
638 				if (settle == 0) {
639 					settle = gethrtime();
640 				} else if (NSEC2MSEC(gethrtime() - settle) >=
641 				    settle_ms) {
642 					udev_device_unref(dev);
643 					break;
644 				}
645 			}
646 		}
647 
648 		udev_device_unref(dev);
649 		(void) usleep(sleep_ms * MILLISEC);
650 
651 	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
652 
653 	udev_unref(udev);
654 
655 	return (ret);
656 #else
657 	int settle_ms = 50;
658 	long sleep_ms = 10;
659 	hrtime_t start, settle;
660 	struct stat64 statbuf;
661 
662 	start = gethrtime();
663 	settle = 0;
664 
665 	do {
666 		errno = 0;
667 		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
668 			if (settle == 0)
669 				settle = gethrtime();
670 			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
671 				return (0);
672 		} else if (errno != ENOENT) {
673 			return (errno);
674 		}
675 
676 		usleep(sleep_ms * MILLISEC);
677 	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
678 
679 	return (ENODEV);
680 #endif /* HAVE_LIBUDEV */
681 }
682 
683 /*
684  * Encode the persistent devices strings
685  * used for the vdev disk label
686  */
687 static int
688 encode_device_strings(const char *path, vdev_dev_strs_t *ds,
689     boolean_t wholedisk)
690 {
691 #ifdef HAVE_LIBUDEV
692 	struct udev *udev;
693 	struct udev_device *dev = NULL;
694 	char nodepath[MAXPATHLEN];
695 	char *sysname;
696 	int ret = ENODEV;
697 	hrtime_t start;
698 
699 	if ((udev = udev_new()) == NULL)
700 		return (ENXIO);
701 
702 	/* resolve path to a runtime device node instance */
703 	if (realpath(path, nodepath) == NULL)
704 		goto no_dev;
705 
706 	sysname = strrchr(nodepath, '/') + 1;
707 
708 	/*
709 	 * Wait up to 3 seconds for udev to set up the device node context
710 	 */
711 	start = gethrtime();
712 	do {
713 		dev = udev_device_new_from_subsystem_sysname(udev, "block",
714 		    sysname);
715 		if (dev == NULL)
716 			goto no_dev;
717 		if (udev_device_is_ready(dev))
718 			break;  /* udev ready */
719 
720 		udev_device_unref(dev);
721 		dev = NULL;
722 
723 		if (NSEC2MSEC(gethrtime() - start) < 10)
724 			(void) sched_yield();	/* yield/busy wait up to 10ms */
725 		else
726 			(void) usleep(10 * MILLISEC);
727 
728 	} while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
729 
730 	if (dev == NULL)
731 		goto no_dev;
732 
733 	/*
734 	 * Only whole disks require extra device strings
735 	 */
736 	if (!wholedisk && !udev_mpath_whole_disk(dev))
737 		goto no_dev;
738 
739 	ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
740 	if (ret != 0)
741 		goto no_dev_ref;
742 
743 	/* physical location string (optional) */
744 	if (zfs_device_get_physical(dev, ds->vds_devphys,
745 	    sizeof (ds->vds_devphys)) != 0) {
746 		ds->vds_devphys[0] = '\0'; /* empty string --> not available */
747 	}
748 
749 no_dev_ref:
750 	udev_device_unref(dev);
751 no_dev:
752 	udev_unref(udev);
753 
754 	return (ret);
755 #else
756 	return (ENOENT);
757 #endif
758 }
759 
760 /*
761  * Update a leaf vdev's persistent device strings
762  *
763  * - only applies for a dedicated leaf vdev (aka whole disk)
764  * - updated during pool create|add|attach|import
765  * - used for matching device matching during auto-{online,expand,replace}
766  * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
767  * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
768  *
769  * single device node example:
770  * 	devid:		'scsi-MG03SCA300_350000494a8cb3d67-part1'
771  * 	phys_path:	'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
772  *
773  * multipath device node example:
774  * 	devid:		'dm-uuid-mpath-35000c5006304de3f'
775  *
776  * We also store the enclosure sysfs path for turning on enclosure LEDs
777  * (if applicable):
778  *	vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
779  */
780 void
781 update_vdev_config_dev_strs(nvlist_t *nv)
782 {
783 	vdev_dev_strs_t vds;
784 	char *env, *type, *path;
785 	uint64_t wholedisk = 0;
786 	char *upath, *spath;
787 
788 	/*
789 	 * For the benefit of legacy ZFS implementations, allow
790 	 * for opting out of devid strings in the vdev label.
791 	 *
792 	 * example use:
793 	 *	env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
794 	 *
795 	 * explanation:
796 	 * Older OpenZFS implementations had issues when attempting to
797 	 * display pool config VDEV names if a "devid" NVP value is
798 	 * present in the pool's config.
799 	 *
800 	 * For example, a pool that originated on illumos platform would
801 	 * have a devid value in the config and "zpool status" would fail
802 	 * when listing the config.
803 	 *
804 	 * A pool can be stripped of any "devid" values on import or
805 	 * prevented from adding them on zpool create|add by setting
806 	 * ZFS_VDEV_DEVID_OPT_OUT.
807 	 */
808 	env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
809 	if (env && (strtoul(env, NULL, 0) > 0 ||
810 	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
811 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
812 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
813 		return;
814 	}
815 
816 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
817 	    strcmp(type, VDEV_TYPE_DISK) != 0) {
818 		return;
819 	}
820 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
821 		return;
822 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
823 
824 	/*
825 	 * Update device string values in the config nvlist.
826 	 */
827 	if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
828 		(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
829 		if (vds.vds_devphys[0] != '\0') {
830 			(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
831 			    vds.vds_devphys);
832 		}
833 
834 		/* Add enclosure sysfs path (if disk is in an enclosure). */
835 		upath = zfs_get_underlying_path(path);
836 		spath = zfs_get_enclosure_sysfs_path(upath);
837 		if (spath)
838 			nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
839 			    spath);
840 		else
841 			nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
842 
843 		free(upath);
844 		free(spath);
845 	} else {
846 		/* Clear out any stale entries. */
847 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
848 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
849 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
850 	}
851 }
852