1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25  * Copyright 2015 RackTop Systems.
26  * Copyright (c) 2016, Intel Corporation.
27  */
28 
29 /*
30  * Pool import support functions.
31  *
32  * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
33  * these commands are expected to run in the global zone, we can assume
34  * that the devices are all readable when called.
35  *
36  * To import a pool, we rely on reading the configuration information from the
37  * ZFS label of each device.  If we successfully read the label, then we
38  * organize the configuration information in the following hierarchy:
39  *
40  *	pool guid -> toplevel vdev guid -> label txg
41  *
42  * Duplicate entries matching this same tuple will be discarded.  Once we have
43  * examined every device, we pick the best label txg config for each toplevel
44  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
45  * update any paths that have changed.  Finally, we attempt to import the pool
46  * using our derived config, and record the results.
47  */
48 
49 #include <ctype.h>
50 #include <dirent.h>
51 #include <errno.h>
52 #include <libintl.h>
53 #include <libgen.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <sys/stat.h>
59 #include <unistd.h>
60 #include <fcntl.h>
61 #include <sys/dktp/fdisk.h>
62 #include <sys/vdev_impl.h>
63 #include <sys/fs/zfs.h>
64 
65 #include <thread_pool.h>
66 #include <libzutil.h>
67 #include <libnvpair.h>
68 #include <libzfs.h>
69 
70 #include "zutil_import.h"
71 
72 #ifdef HAVE_LIBUDEV
73 #include <libudev.h>
74 #include <sched.h>
75 #endif
76 #include <blkid/blkid.h>
77 
78 #define	DEV_BYID_PATH	"/dev/disk/by-id/"
79 
80 /*
81  * Skip devices with well known prefixes:
82  * there can be side effects when opening devices which need to be avoided.
83  *
84  * hpet        - High Precision Event Timer
85  * watchdog[N] - Watchdog must be closed in a special way.
86  */
87 static boolean_t
88 should_skip_dev(const char *dev)
89 {
90 	return ((strcmp(dev, "watchdog") == 0) ||
91 	    (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) ||
92 	    (strcmp(dev, "hpet") == 0));
93 }
94 
95 int
96 zfs_dev_flush(int fd)
97 {
98 	return (ioctl(fd, BLKFLSBUF));
99 }
100 
101 void
102 zpool_open_func(void *arg)
103 {
104 	rdsk_node_t *rn = arg;
105 	libpc_handle_t *hdl = rn->rn_hdl;
106 	struct stat64 statbuf;
107 	nvlist_t *config;
108 	uint64_t vdev_guid = 0;
109 	int error;
110 	int num_labels = 0;
111 	int fd;
112 
113 	if (should_skip_dev(zfs_basename(rn->rn_name)))
114 		return;
115 
116 	/*
117 	 * Ignore failed stats.  We only want regular files and block devices.
118 	 * Ignore files that are too small to hold a zpool.
119 	 */
120 	if (stat64(rn->rn_name, &statbuf) != 0 ||
121 	    (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) ||
122 	    (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE))
123 		return;
124 
125 	/*
126 	 * Preferentially open using O_DIRECT to bypass the block device
127 	 * cache which may be stale for multipath devices.  An EINVAL errno
128 	 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
129 	 */
130 	fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
131 	if ((fd < 0) && (errno == EINVAL))
132 		fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
133 	if ((fd < 0) && (errno == EACCES))
134 		hdl->lpc_open_access_error = B_TRUE;
135 	if (fd < 0)
136 		return;
137 
138 	error = zpool_read_label(fd, &config, &num_labels);
139 	if (error != 0) {
140 		(void) close(fd);
141 		return;
142 	}
143 
144 	if (num_labels == 0) {
145 		(void) close(fd);
146 		nvlist_free(config);
147 		return;
148 	}
149 
150 	/*
151 	 * Check that the vdev is for the expected guid.  Additional entries
152 	 * are speculatively added based on the paths stored in the labels.
153 	 * Entries with valid paths but incorrect guids must be removed.
154 	 */
155 	error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
156 	if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
157 		(void) close(fd);
158 		nvlist_free(config);
159 		return;
160 	}
161 
162 	(void) close(fd);
163 
164 	rn->rn_config = config;
165 	rn->rn_num_labels = num_labels;
166 
167 	/*
168 	 * Add additional entries for paths described by this label.
169 	 */
170 	if (rn->rn_labelpaths) {
171 		char *path = NULL;
172 		char *devid = NULL;
173 		char *env = NULL;
174 		rdsk_node_t *slice;
175 		avl_index_t where;
176 		int timeout;
177 		int error;
178 
179 		if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
180 			return;
181 
182 		env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
183 		if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
184 		    timeout < 0) {
185 			timeout = DISK_LABEL_WAIT;
186 		}
187 
188 		/*
189 		 * Allow devlinks to stabilize so all paths are available.
190 		 */
191 		zpool_label_disk_wait(rn->rn_name, timeout);
192 
193 		if (path != NULL) {
194 			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
195 			slice->rn_name = zutil_strdup(hdl, path);
196 			slice->rn_vdev_guid = vdev_guid;
197 			slice->rn_avl = rn->rn_avl;
198 			slice->rn_hdl = hdl;
199 			slice->rn_order = IMPORT_ORDER_PREFERRED_1;
200 			slice->rn_labelpaths = B_FALSE;
201 			pthread_mutex_lock(rn->rn_lock);
202 			if (avl_find(rn->rn_avl, slice, &where)) {
203 			pthread_mutex_unlock(rn->rn_lock);
204 				free(slice->rn_name);
205 				free(slice);
206 			} else {
207 				avl_insert(rn->rn_avl, slice, where);
208 				pthread_mutex_unlock(rn->rn_lock);
209 				zpool_open_func(slice);
210 			}
211 		}
212 
213 		if (devid != NULL) {
214 			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
215 			error = asprintf(&slice->rn_name, "%s%s",
216 			    DEV_BYID_PATH, devid);
217 			if (error == -1) {
218 				free(slice);
219 				return;
220 			}
221 
222 			slice->rn_vdev_guid = vdev_guid;
223 			slice->rn_avl = rn->rn_avl;
224 			slice->rn_hdl = hdl;
225 			slice->rn_order = IMPORT_ORDER_PREFERRED_2;
226 			slice->rn_labelpaths = B_FALSE;
227 			pthread_mutex_lock(rn->rn_lock);
228 			if (avl_find(rn->rn_avl, slice, &where)) {
229 				pthread_mutex_unlock(rn->rn_lock);
230 				free(slice->rn_name);
231 				free(slice);
232 			} else {
233 				avl_insert(rn->rn_avl, slice, where);
234 				pthread_mutex_unlock(rn->rn_lock);
235 				zpool_open_func(slice);
236 			}
237 		}
238 	}
239 }
240 
241 static const char * const
242 zpool_default_import_path[] = {
243 	"/dev/disk/by-vdev",	/* Custom rules, use first if they exist */
244 	"/dev/mapper",		/* Use multipath devices before components */
245 	"/dev/disk/by-partlabel", /* Single unique entry set by user */
246 	"/dev/disk/by-partuuid", /* Generated partition uuid */
247 	"/dev/disk/by-label",	/* Custom persistent labels */
248 	"/dev/disk/by-uuid",	/* Single unique entry and persistent */
249 	"/dev/disk/by-id",	/* May be multiple entries and persistent */
250 	"/dev/disk/by-path",	/* Encodes physical location and persistent */
251 	"/dev"			/* UNSAFE device names will change */
252 };
253 
254 const char * const *
255 zpool_default_search_paths(size_t *count)
256 {
257 	*count = ARRAY_SIZE(zpool_default_import_path);
258 	return (zpool_default_import_path);
259 }
260 
261 /*
262  * Given a full path to a device determine if that device appears in the
263  * import search path.  If it does return the first match and store the
264  * index in the passed 'order' variable, otherwise return an error.
265  */
266 static int
267 zfs_path_order(char *name, int *order)
268 {
269 	int i, error = ENOENT;
270 	char *dir, *env, *envdup, *tmp = NULL;
271 
272 	env = getenv("ZPOOL_IMPORT_PATH");
273 	if (env) {
274 		envdup = strdup(env);
275 		for (dir = strtok_r(envdup, ":", &tmp), i = 0;
276 		    dir != NULL;
277 		    dir = strtok_r(NULL, ":", &tmp), i++) {
278 			if (strncmp(name, dir, strlen(dir)) == 0) {
279 				*order = i;
280 				error = 0;
281 				break;
282 			}
283 		}
284 		free(envdup);
285 	} else {
286 		for (i = 0; i < ARRAY_SIZE(zpool_default_import_path); i++) {
287 			if (strncmp(name, zpool_default_import_path[i],
288 			    strlen(zpool_default_import_path[i])) == 0) {
289 				*order = i;
290 				error = 0;
291 				break;
292 			}
293 		}
294 	}
295 
296 	return (error);
297 }
298 
299 /*
300  * Use libblkid to quickly enumerate all known zfs devices.
301  */
302 int
303 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
304     avl_tree_t **slice_cache)
305 {
306 	rdsk_node_t *slice;
307 	blkid_cache cache;
308 	blkid_dev_iterate iter;
309 	blkid_dev dev;
310 	avl_index_t where;
311 	int error;
312 
313 	*slice_cache = NULL;
314 
315 	error = blkid_get_cache(&cache, NULL);
316 	if (error != 0)
317 		return (error);
318 
319 	error = blkid_probe_all_new(cache);
320 	if (error != 0) {
321 		blkid_put_cache(cache);
322 		return (error);
323 	}
324 
325 	iter = blkid_dev_iterate_begin(cache);
326 	if (iter == NULL) {
327 		blkid_put_cache(cache);
328 		return (EINVAL);
329 	}
330 
331 	error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
332 	if (error != 0) {
333 		blkid_dev_iterate_end(iter);
334 		blkid_put_cache(cache);
335 		return (error);
336 	}
337 
338 	*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
339 	avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
340 	    offsetof(rdsk_node_t, rn_node));
341 
342 	while (blkid_dev_next(iter, &dev) == 0) {
343 		slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
344 		slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
345 		slice->rn_vdev_guid = 0;
346 		slice->rn_lock = lock;
347 		slice->rn_avl = *slice_cache;
348 		slice->rn_hdl = hdl;
349 		slice->rn_labelpaths = B_TRUE;
350 
351 		error = zfs_path_order(slice->rn_name, &slice->rn_order);
352 		if (error == 0)
353 			slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
354 		else
355 			slice->rn_order = IMPORT_ORDER_DEFAULT;
356 
357 		pthread_mutex_lock(lock);
358 		if (avl_find(*slice_cache, slice, &where)) {
359 			free(slice->rn_name);
360 			free(slice);
361 		} else {
362 			avl_insert(*slice_cache, slice, where);
363 		}
364 		pthread_mutex_unlock(lock);
365 	}
366 
367 	blkid_dev_iterate_end(iter);
368 	blkid_put_cache(cache);
369 
370 	return (0);
371 }
372 
373 /*
374  * Linux persistent device strings for vdev labels
375  *
376  * based on libudev for consistency with libudev disk add/remove events
377  */
378 
379 typedef struct vdev_dev_strs {
380 	char	vds_devid[128];
381 	char	vds_devphys[128];
382 } vdev_dev_strs_t;
383 
384 #ifdef HAVE_LIBUDEV
385 
386 /*
387  * Obtain the persistent device id string (describes what)
388  *
389  * used by ZED vdev matching for auto-{online,expand,replace}
390  */
391 int
392 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
393 {
394 	struct udev_list_entry *entry;
395 	const char *bus;
396 	char devbyid[MAXPATHLEN];
397 
398 	/* The bus based by-id path is preferred */
399 	bus = udev_device_get_property_value(dev, "ID_BUS");
400 
401 	if (bus == NULL) {
402 		const char *dm_uuid;
403 
404 		/*
405 		 * For multipath nodes use the persistent uuid based identifier
406 		 *
407 		 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
408 		 */
409 		dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
410 		if (dm_uuid != NULL) {
411 			(void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
412 			return (0);
413 		}
414 
415 		/*
416 		 * For volumes use the persistent /dev/zvol/dataset identifier
417 		 */
418 		entry = udev_device_get_devlinks_list_entry(dev);
419 		while (entry != NULL) {
420 			const char *name;
421 
422 			name = udev_list_entry_get_name(entry);
423 			if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
424 				(void) strlcpy(bufptr, name, buflen);
425 				return (0);
426 			}
427 			entry = udev_list_entry_get_next(entry);
428 		}
429 
430 		/*
431 		 * NVME 'by-id' symlinks are similar to bus case
432 		 */
433 		struct udev_device *parent;
434 
435 		parent = udev_device_get_parent_with_subsystem_devtype(dev,
436 		    "nvme", NULL);
437 		if (parent != NULL)
438 			bus = "nvme";	/* continue with bus symlink search */
439 		else
440 			return (ENODATA);
441 	}
442 
443 	/*
444 	 * locate the bus specific by-id link
445 	 */
446 	(void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
447 	entry = udev_device_get_devlinks_list_entry(dev);
448 	while (entry != NULL) {
449 		const char *name;
450 
451 		name = udev_list_entry_get_name(entry);
452 		if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
453 			name += strlen(DEV_BYID_PATH);
454 			(void) strlcpy(bufptr, name, buflen);
455 			return (0);
456 		}
457 		entry = udev_list_entry_get_next(entry);
458 	}
459 
460 	return (ENODATA);
461 }
462 
463 /*
464  * Obtain the persistent physical location string (describes where)
465  *
466  * used by ZED vdev matching for auto-{online,expand,replace}
467  */
468 int
469 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
470 {
471 	const char *physpath = NULL;
472 	struct udev_list_entry *entry;
473 
474 	/*
475 	 * Normal disks use ID_PATH for their physical path.
476 	 */
477 	physpath = udev_device_get_property_value(dev, "ID_PATH");
478 	if (physpath != NULL && strlen(physpath) > 0) {
479 		(void) strlcpy(bufptr, physpath, buflen);
480 		return (0);
481 	}
482 
483 	/*
484 	 * Device mapper devices are virtual and don't have a physical
485 	 * path. For them we use ID_VDEV instead, which is setup via the
486 	 * /etc/vdev_id.conf file.  ID_VDEV provides a persistent path
487 	 * to a virtual device.  If you don't have vdev_id.conf setup,
488 	 * you cannot use multipath autoreplace with device mapper.
489 	 */
490 	physpath = udev_device_get_property_value(dev, "ID_VDEV");
491 	if (physpath != NULL && strlen(physpath) > 0) {
492 		(void) strlcpy(bufptr, physpath, buflen);
493 		return (0);
494 	}
495 
496 	/*
497 	 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
498 	 */
499 	entry = udev_device_get_devlinks_list_entry(dev);
500 	while (entry != NULL) {
501 		physpath = udev_list_entry_get_name(entry);
502 		if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
503 			(void) strlcpy(bufptr, physpath, buflen);
504 			return (0);
505 		}
506 		entry = udev_list_entry_get_next(entry);
507 	}
508 
509 	/*
510 	 * For all other devices fallback to using the by-uuid name.
511 	 */
512 	entry = udev_device_get_devlinks_list_entry(dev);
513 	while (entry != NULL) {
514 		physpath = udev_list_entry_get_name(entry);
515 		if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
516 			(void) strlcpy(bufptr, physpath, buflen);
517 			return (0);
518 		}
519 		entry = udev_list_entry_get_next(entry);
520 	}
521 
522 	return (ENODATA);
523 }
524 
525 /*
526  * A disk is considered a multipath whole disk when:
527  *	DEVNAME key value has "dm-"
528  *	DM_NAME key value has "mpath" prefix
529  *	DM_UUID key exists
530  *	ID_PART_TABLE_TYPE key does not exist or is not gpt
531  */
532 static boolean_t
533 udev_mpath_whole_disk(struct udev_device *dev)
534 {
535 	const char *devname, *type, *uuid;
536 
537 	devname = udev_device_get_property_value(dev, "DEVNAME");
538 	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
539 	uuid = udev_device_get_property_value(dev, "DM_UUID");
540 
541 	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
542 	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
543 	    (uuid != NULL)) {
544 		return (B_TRUE);
545 	}
546 
547 	return (B_FALSE);
548 }
549 
550 static int
551 udev_device_is_ready(struct udev_device *dev)
552 {
553 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
554 	return (udev_device_get_is_initialized(dev));
555 #else
556 	/* wait for DEVLINKS property to be initialized */
557 	return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
558 #endif
559 }
560 
561 #else
562 
563 int
564 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
565 {
566 	(void) dev, (void) bufptr, (void) buflen;
567 	return (ENODATA);
568 }
569 
570 int
571 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
572 {
573 	(void) dev, (void) bufptr, (void) buflen;
574 	return (ENODATA);
575 }
576 
577 #endif /* HAVE_LIBUDEV */
578 
579 /*
580  * Wait up to timeout_ms for udev to set up the device node.  The device is
581  * considered ready when libudev determines it has been initialized, all of
582  * the device links have been verified to exist, and it has been allowed to
583  * settle.  At this point the device the device can be accessed reliably.
584  * Depending on the complexity of the udev rules this process could take
585  * several seconds.
586  */
587 int
588 zpool_label_disk_wait(const char *path, int timeout_ms)
589 {
590 #ifdef HAVE_LIBUDEV
591 	struct udev *udev;
592 	struct udev_device *dev = NULL;
593 	char nodepath[MAXPATHLEN];
594 	char *sysname = NULL;
595 	int ret = ENODEV;
596 	int settle_ms = 50;
597 	long sleep_ms = 10;
598 	hrtime_t start, settle;
599 
600 	if ((udev = udev_new()) == NULL)
601 		return (ENXIO);
602 
603 	start = gethrtime();
604 	settle = 0;
605 
606 	do {
607 		if (sysname == NULL) {
608 			if (realpath(path, nodepath) != NULL) {
609 				sysname = strrchr(nodepath, '/') + 1;
610 			} else {
611 				(void) usleep(sleep_ms * MILLISEC);
612 				continue;
613 			}
614 		}
615 
616 		dev = udev_device_new_from_subsystem_sysname(udev,
617 		    "block", sysname);
618 		if ((dev != NULL) && udev_device_is_ready(dev)) {
619 			struct udev_list_entry *links, *link = NULL;
620 
621 			ret = 0;
622 			links = udev_device_get_devlinks_list_entry(dev);
623 
624 			udev_list_entry_foreach(link, links) {
625 				struct stat64 statbuf;
626 				const char *name;
627 
628 				name = udev_list_entry_get_name(link);
629 				errno = 0;
630 				if (stat64(name, &statbuf) == 0 && errno == 0)
631 					continue;
632 
633 				settle = 0;
634 				ret = ENODEV;
635 				break;
636 			}
637 
638 			if (ret == 0) {
639 				if (settle == 0) {
640 					settle = gethrtime();
641 				} else if (NSEC2MSEC(gethrtime() - settle) >=
642 				    settle_ms) {
643 					udev_device_unref(dev);
644 					break;
645 				}
646 			}
647 		}
648 
649 		udev_device_unref(dev);
650 		(void) usleep(sleep_ms * MILLISEC);
651 
652 	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
653 
654 	udev_unref(udev);
655 
656 	return (ret);
657 #else
658 	int settle_ms = 50;
659 	long sleep_ms = 10;
660 	hrtime_t start, settle;
661 	struct stat64 statbuf;
662 
663 	start = gethrtime();
664 	settle = 0;
665 
666 	do {
667 		errno = 0;
668 		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
669 			if (settle == 0)
670 				settle = gethrtime();
671 			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
672 				return (0);
673 		} else if (errno != ENOENT) {
674 			return (errno);
675 		}
676 
677 		usleep(sleep_ms * MILLISEC);
678 	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
679 
680 	return (ENODEV);
681 #endif /* HAVE_LIBUDEV */
682 }
683 
684 /*
685  * Encode the persistent devices strings
686  * used for the vdev disk label
687  */
688 static int
689 encode_device_strings(const char *path, vdev_dev_strs_t *ds,
690     boolean_t wholedisk)
691 {
692 #ifdef HAVE_LIBUDEV
693 	struct udev *udev;
694 	struct udev_device *dev = NULL;
695 	char nodepath[MAXPATHLEN];
696 	char *sysname;
697 	int ret = ENODEV;
698 	hrtime_t start;
699 
700 	if ((udev = udev_new()) == NULL)
701 		return (ENXIO);
702 
703 	/* resolve path to a runtime device node instance */
704 	if (realpath(path, nodepath) == NULL)
705 		goto no_dev;
706 
707 	sysname = strrchr(nodepath, '/') + 1;
708 
709 	/*
710 	 * Wait up to 3 seconds for udev to set up the device node context
711 	 */
712 	start = gethrtime();
713 	do {
714 		dev = udev_device_new_from_subsystem_sysname(udev, "block",
715 		    sysname);
716 		if (dev == NULL)
717 			goto no_dev;
718 		if (udev_device_is_ready(dev))
719 			break;  /* udev ready */
720 
721 		udev_device_unref(dev);
722 		dev = NULL;
723 
724 		if (NSEC2MSEC(gethrtime() - start) < 10)
725 			(void) sched_yield();	/* yield/busy wait up to 10ms */
726 		else
727 			(void) usleep(10 * MILLISEC);
728 
729 	} while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
730 
731 	if (dev == NULL)
732 		goto no_dev;
733 
734 	/*
735 	 * Only whole disks require extra device strings
736 	 */
737 	if (!wholedisk && !udev_mpath_whole_disk(dev))
738 		goto no_dev;
739 
740 	ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
741 	if (ret != 0)
742 		goto no_dev_ref;
743 
744 	/* physical location string (optional) */
745 	if (zfs_device_get_physical(dev, ds->vds_devphys,
746 	    sizeof (ds->vds_devphys)) != 0) {
747 		ds->vds_devphys[0] = '\0'; /* empty string --> not available */
748 	}
749 
750 no_dev_ref:
751 	udev_device_unref(dev);
752 no_dev:
753 	udev_unref(udev);
754 
755 	return (ret);
756 #else
757 	(void) path;
758 	(void) ds;
759 	(void) wholedisk;
760 	return (ENOENT);
761 #endif
762 }
763 
764 /*
765  * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
766  * in the nvlist * (if applicable).  Like:
767  *    vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
768  */
769 static void
770 update_vdev_config_dev_sysfs_path(nvlist_t *nv, char *path)
771 {
772 	char *upath, *spath;
773 
774 	/* Add enclosure sysfs path (if disk is in an enclosure). */
775 	upath = zfs_get_underlying_path(path);
776 	spath = zfs_get_enclosure_sysfs_path(upath);
777 
778 	if (spath) {
779 		nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath);
780 	} else {
781 		nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
782 	}
783 
784 	free(upath);
785 	free(spath);
786 }
787 
788 /*
789  * This will get called for each leaf vdev.
790  */
791 static int
792 sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
793 {
794 	(void) hdl_data, (void) data;
795 
796 	char *path = NULL;
797 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
798 		return (1);
799 
800 	/* Rescan our enclosure sysfs path for this vdev */
801 	update_vdev_config_dev_sysfs_path(nv, path);
802 	return (0);
803 }
804 
805 /*
806  * Given an nvlist for our pool (with vdev tree), iterate over all the
807  * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH.
808  */
809 void
810 update_vdevs_config_dev_sysfs_path(nvlist_t *config)
811 {
812 	nvlist_t *nvroot = NULL;
813 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
814 	    &nvroot) == 0);
815 	for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL);
816 }
817 
818 /*
819  * Update a leaf vdev's persistent device strings
820  *
821  * - only applies for a dedicated leaf vdev (aka whole disk)
822  * - updated during pool create|add|attach|import
823  * - used for matching device matching during auto-{online,expand,replace}
824  * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
825  * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
826  *
827  * single device node example:
828  * 	devid:		'scsi-MG03SCA300_350000494a8cb3d67-part1'
829  * 	phys_path:	'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
830  *
831  * multipath device node example:
832  * 	devid:		'dm-uuid-mpath-35000c5006304de3f'
833  *
834  * We also store the enclosure sysfs path for turning on enclosure LEDs
835  * (if applicable):
836  *	vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
837  */
838 void
839 update_vdev_config_dev_strs(nvlist_t *nv)
840 {
841 	vdev_dev_strs_t vds;
842 	char *env, *type, *path;
843 	uint64_t wholedisk = 0;
844 
845 	/*
846 	 * For the benefit of legacy ZFS implementations, allow
847 	 * for opting out of devid strings in the vdev label.
848 	 *
849 	 * example use:
850 	 *	env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
851 	 *
852 	 * explanation:
853 	 * Older OpenZFS implementations had issues when attempting to
854 	 * display pool config VDEV names if a "devid" NVP value is
855 	 * present in the pool's config.
856 	 *
857 	 * For example, a pool that originated on illumos platform would
858 	 * have a devid value in the config and "zpool status" would fail
859 	 * when listing the config.
860 	 *
861 	 * A pool can be stripped of any "devid" values on import or
862 	 * prevented from adding them on zpool create|add by setting
863 	 * ZFS_VDEV_DEVID_OPT_OUT.
864 	 */
865 	env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
866 	if (env && (strtoul(env, NULL, 0) > 0 ||
867 	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
868 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
869 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
870 		return;
871 	}
872 
873 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
874 	    strcmp(type, VDEV_TYPE_DISK) != 0) {
875 		return;
876 	}
877 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
878 		return;
879 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
880 
881 	/*
882 	 * Update device string values in the config nvlist.
883 	 */
884 	if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
885 		(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
886 		if (vds.vds_devphys[0] != '\0') {
887 			(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
888 			    vds.vds_devphys);
889 		}
890 		update_vdev_config_dev_sysfs_path(nv, path);
891 	} else {
892 		/* Clear out any stale entries. */
893 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
894 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
895 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
896 	}
897 }
898