1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 25 * Copyright 2015 RackTop Systems. 26 * Copyright (c) 2016, Intel Corporation. 27 */ 28 29 /* 30 * Pool import support functions. 31 * 32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since 33 * these commands are expected to run in the global zone, we can assume 34 * that the devices are all readable when called. 35 * 36 * To import a pool, we rely on reading the configuration information from the 37 * ZFS label of each device. If we successfully read the label, then we 38 * organize the configuration information in the following hierarchy: 39 * 40 * pool guid -> toplevel vdev guid -> label txg 41 * 42 * Duplicate entries matching this same tuple will be discarded. Once we have 43 * examined every device, we pick the best label txg config for each toplevel 44 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 45 * update any paths that have changed. Finally, we attempt to import the pool 46 * using our derived config, and record the results. 47 */ 48 49 #include <ctype.h> 50 #include <dirent.h> 51 #include <errno.h> 52 #include <libintl.h> 53 #include <libgen.h> 54 #include <stddef.h> 55 #include <stdlib.h> 56 #include <stdio.h> 57 #include <string.h> 58 #include <sys/stat.h> 59 #include <unistd.h> 60 #include <fcntl.h> 61 #include <sys/dktp/fdisk.h> 62 #include <sys/vdev_impl.h> 63 #include <sys/fs/zfs.h> 64 65 #include <thread_pool.h> 66 #include <libzutil.h> 67 #include <libnvpair.h> 68 #include <libzfs.h> 69 70 #include "zutil_import.h" 71 72 #ifdef HAVE_LIBUDEV 73 #include <libudev.h> 74 #include <sched.h> 75 #endif 76 #include <blkid/blkid.h> 77 78 #define DEV_BYID_PATH "/dev/disk/by-id/" 79 80 /* 81 * Skip devices with well known prefixes: 82 * there can be side effects when opening devices which need to be avoided. 83 * 84 * hpet - High Precision Event Timer 85 * watchdog[N] - Watchdog must be closed in a special way. 86 */ 87 static boolean_t 88 should_skip_dev(const char *dev) 89 { 90 return ((strcmp(dev, "watchdog") == 0) || 91 (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) || 92 (strcmp(dev, "hpet") == 0)); 93 } 94 95 int 96 zfs_dev_flush(int fd) 97 { 98 return (ioctl(fd, BLKFLSBUF)); 99 } 100 101 void 102 zpool_open_func(void *arg) 103 { 104 rdsk_node_t *rn = arg; 105 libpc_handle_t *hdl = rn->rn_hdl; 106 struct stat64 statbuf; 107 nvlist_t *config; 108 uint64_t vdev_guid = 0; 109 int error; 110 int num_labels = 0; 111 int fd; 112 113 if (should_skip_dev(zfs_basename(rn->rn_name))) 114 return; 115 116 /* 117 * Ignore failed stats. We only want regular files and block devices. 118 * Ignore files that are too small to hold a zpool. 119 */ 120 if (stat64(rn->rn_name, &statbuf) != 0 || 121 (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) || 122 (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE)) 123 return; 124 125 /* 126 * Preferentially open using O_DIRECT to bypass the block device 127 * cache which may be stale for multipath devices. An EINVAL errno 128 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY. 129 */ 130 fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC); 131 if ((fd < 0) && (errno == EINVAL)) 132 fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC); 133 if ((fd < 0) && (errno == EACCES)) 134 hdl->lpc_open_access_error = B_TRUE; 135 if (fd < 0) 136 return; 137 138 error = zpool_read_label(fd, &config, &num_labels); 139 if (error != 0) { 140 (void) close(fd); 141 return; 142 } 143 144 if (num_labels == 0) { 145 (void) close(fd); 146 nvlist_free(config); 147 return; 148 } 149 150 /* 151 * Check that the vdev is for the expected guid. Additional entries 152 * are speculatively added based on the paths stored in the labels. 153 * Entries with valid paths but incorrect guids must be removed. 154 */ 155 error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 156 if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) { 157 (void) close(fd); 158 nvlist_free(config); 159 return; 160 } 161 162 (void) close(fd); 163 164 rn->rn_config = config; 165 rn->rn_num_labels = num_labels; 166 167 /* 168 * Add additional entries for paths described by this label. 169 */ 170 if (rn->rn_labelpaths) { 171 char *path = NULL; 172 char *devid = NULL; 173 char *env = NULL; 174 rdsk_node_t *slice; 175 avl_index_t where; 176 int timeout; 177 int error; 178 179 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) 180 return; 181 182 env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); 183 if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || 184 timeout < 0) { 185 timeout = DISK_LABEL_WAIT; 186 } 187 188 /* 189 * Allow devlinks to stabilize so all paths are available. 190 */ 191 zpool_label_disk_wait(rn->rn_name, timeout); 192 193 if (path != NULL) { 194 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 195 slice->rn_name = zutil_strdup(hdl, path); 196 slice->rn_vdev_guid = vdev_guid; 197 slice->rn_avl = rn->rn_avl; 198 slice->rn_hdl = hdl; 199 slice->rn_order = IMPORT_ORDER_PREFERRED_1; 200 slice->rn_labelpaths = B_FALSE; 201 pthread_mutex_lock(rn->rn_lock); 202 if (avl_find(rn->rn_avl, slice, &where)) { 203 pthread_mutex_unlock(rn->rn_lock); 204 free(slice->rn_name); 205 free(slice); 206 } else { 207 avl_insert(rn->rn_avl, slice, where); 208 pthread_mutex_unlock(rn->rn_lock); 209 zpool_open_func(slice); 210 } 211 } 212 213 if (devid != NULL) { 214 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 215 error = asprintf(&slice->rn_name, "%s%s", 216 DEV_BYID_PATH, devid); 217 if (error == -1) { 218 free(slice); 219 return; 220 } 221 222 slice->rn_vdev_guid = vdev_guid; 223 slice->rn_avl = rn->rn_avl; 224 slice->rn_hdl = hdl; 225 slice->rn_order = IMPORT_ORDER_PREFERRED_2; 226 slice->rn_labelpaths = B_FALSE; 227 pthread_mutex_lock(rn->rn_lock); 228 if (avl_find(rn->rn_avl, slice, &where)) { 229 pthread_mutex_unlock(rn->rn_lock); 230 free(slice->rn_name); 231 free(slice); 232 } else { 233 avl_insert(rn->rn_avl, slice, where); 234 pthread_mutex_unlock(rn->rn_lock); 235 zpool_open_func(slice); 236 } 237 } 238 } 239 } 240 241 static const char * const 242 zpool_default_import_path[] = { 243 "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ 244 "/dev/mapper", /* Use multipath devices before components */ 245 "/dev/disk/by-partlabel", /* Single unique entry set by user */ 246 "/dev/disk/by-partuuid", /* Generated partition uuid */ 247 "/dev/disk/by-label", /* Custom persistent labels */ 248 "/dev/disk/by-uuid", /* Single unique entry and persistent */ 249 "/dev/disk/by-id", /* May be multiple entries and persistent */ 250 "/dev/disk/by-path", /* Encodes physical location and persistent */ 251 "/dev" /* UNSAFE device names will change */ 252 }; 253 254 const char * const * 255 zpool_default_search_paths(size_t *count) 256 { 257 *count = ARRAY_SIZE(zpool_default_import_path); 258 return (zpool_default_import_path); 259 } 260 261 /* 262 * Given a full path to a device determine if that device appears in the 263 * import search path. If it does return the first match and store the 264 * index in the passed 'order' variable, otherwise return an error. 265 */ 266 static int 267 zfs_path_order(char *name, int *order) 268 { 269 int i, error = ENOENT; 270 char *dir, *env, *envdup, *tmp = NULL; 271 272 env = getenv("ZPOOL_IMPORT_PATH"); 273 if (env) { 274 envdup = strdup(env); 275 for (dir = strtok_r(envdup, ":", &tmp), i = 0; 276 dir != NULL; 277 dir = strtok_r(NULL, ":", &tmp), i++) { 278 if (strncmp(name, dir, strlen(dir)) == 0) { 279 *order = i; 280 error = 0; 281 break; 282 } 283 } 284 free(envdup); 285 } else { 286 for (i = 0; i < ARRAY_SIZE(zpool_default_import_path); i++) { 287 if (strncmp(name, zpool_default_import_path[i], 288 strlen(zpool_default_import_path[i])) == 0) { 289 *order = i; 290 error = 0; 291 break; 292 } 293 } 294 } 295 296 return (error); 297 } 298 299 /* 300 * Use libblkid to quickly enumerate all known zfs devices. 301 */ 302 int 303 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, 304 avl_tree_t **slice_cache) 305 { 306 rdsk_node_t *slice; 307 blkid_cache cache; 308 blkid_dev_iterate iter; 309 blkid_dev dev; 310 avl_index_t where; 311 int error; 312 313 *slice_cache = NULL; 314 315 error = blkid_get_cache(&cache, NULL); 316 if (error != 0) 317 return (error); 318 319 error = blkid_probe_all_new(cache); 320 if (error != 0) { 321 blkid_put_cache(cache); 322 return (error); 323 } 324 325 iter = blkid_dev_iterate_begin(cache); 326 if (iter == NULL) { 327 blkid_put_cache(cache); 328 return (EINVAL); 329 } 330 331 error = blkid_dev_set_search(iter, "TYPE", "zfs_member"); 332 if (error != 0) { 333 blkid_dev_iterate_end(iter); 334 blkid_put_cache(cache); 335 return (error); 336 } 337 338 *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t)); 339 avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t), 340 offsetof(rdsk_node_t, rn_node)); 341 342 while (blkid_dev_next(iter, &dev) == 0) { 343 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 344 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev)); 345 slice->rn_vdev_guid = 0; 346 slice->rn_lock = lock; 347 slice->rn_avl = *slice_cache; 348 slice->rn_hdl = hdl; 349 slice->rn_labelpaths = B_TRUE; 350 351 error = zfs_path_order(slice->rn_name, &slice->rn_order); 352 if (error == 0) 353 slice->rn_order += IMPORT_ORDER_SCAN_OFFSET; 354 else 355 slice->rn_order = IMPORT_ORDER_DEFAULT; 356 357 pthread_mutex_lock(lock); 358 if (avl_find(*slice_cache, slice, &where)) { 359 free(slice->rn_name); 360 free(slice); 361 } else { 362 avl_insert(*slice_cache, slice, where); 363 } 364 pthread_mutex_unlock(lock); 365 } 366 367 blkid_dev_iterate_end(iter); 368 blkid_put_cache(cache); 369 370 return (0); 371 } 372 373 /* 374 * Linux persistent device strings for vdev labels 375 * 376 * based on libudev for consistency with libudev disk add/remove events 377 */ 378 379 typedef struct vdev_dev_strs { 380 char vds_devid[128]; 381 char vds_devphys[128]; 382 } vdev_dev_strs_t; 383 384 #ifdef HAVE_LIBUDEV 385 386 /* 387 * Obtain the persistent device id string (describes what) 388 * 389 * used by ZED vdev matching for auto-{online,expand,replace} 390 */ 391 int 392 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 393 { 394 struct udev_list_entry *entry; 395 const char *bus; 396 char devbyid[MAXPATHLEN]; 397 398 /* The bus based by-id path is preferred */ 399 bus = udev_device_get_property_value(dev, "ID_BUS"); 400 401 if (bus == NULL) { 402 const char *dm_uuid; 403 404 /* 405 * For multipath nodes use the persistent uuid based identifier 406 * 407 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f 408 */ 409 dm_uuid = udev_device_get_property_value(dev, "DM_UUID"); 410 if (dm_uuid != NULL) { 411 (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid); 412 return (0); 413 } 414 415 /* 416 * For volumes use the persistent /dev/zvol/dataset identifier 417 */ 418 entry = udev_device_get_devlinks_list_entry(dev); 419 while (entry != NULL) { 420 const char *name; 421 422 name = udev_list_entry_get_name(entry); 423 if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 424 (void) strlcpy(bufptr, name, buflen); 425 return (0); 426 } 427 entry = udev_list_entry_get_next(entry); 428 } 429 430 /* 431 * NVME 'by-id' symlinks are similar to bus case 432 */ 433 struct udev_device *parent; 434 435 parent = udev_device_get_parent_with_subsystem_devtype(dev, 436 "nvme", NULL); 437 if (parent != NULL) 438 bus = "nvme"; /* continue with bus symlink search */ 439 else 440 return (ENODATA); 441 } 442 443 /* 444 * locate the bus specific by-id link 445 */ 446 (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus); 447 entry = udev_device_get_devlinks_list_entry(dev); 448 while (entry != NULL) { 449 const char *name; 450 451 name = udev_list_entry_get_name(entry); 452 if (strncmp(name, devbyid, strlen(devbyid)) == 0) { 453 name += strlen(DEV_BYID_PATH); 454 (void) strlcpy(bufptr, name, buflen); 455 return (0); 456 } 457 entry = udev_list_entry_get_next(entry); 458 } 459 460 return (ENODATA); 461 } 462 463 /* 464 * Obtain the persistent physical location string (describes where) 465 * 466 * used by ZED vdev matching for auto-{online,expand,replace} 467 */ 468 int 469 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 470 { 471 const char *physpath = NULL; 472 struct udev_list_entry *entry; 473 474 /* 475 * Normal disks use ID_PATH for their physical path. 476 */ 477 physpath = udev_device_get_property_value(dev, "ID_PATH"); 478 if (physpath != NULL && strlen(physpath) > 0) { 479 (void) strlcpy(bufptr, physpath, buflen); 480 return (0); 481 } 482 483 /* 484 * Device mapper devices are virtual and don't have a physical 485 * path. For them we use ID_VDEV instead, which is setup via the 486 * /etc/vdev_id.conf file. ID_VDEV provides a persistent path 487 * to a virtual device. If you don't have vdev_id.conf setup, 488 * you cannot use multipath autoreplace with device mapper. 489 */ 490 physpath = udev_device_get_property_value(dev, "ID_VDEV"); 491 if (physpath != NULL && strlen(physpath) > 0) { 492 (void) strlcpy(bufptr, physpath, buflen); 493 return (0); 494 } 495 496 /* 497 * For ZFS volumes use the persistent /dev/zvol/dataset identifier 498 */ 499 entry = udev_device_get_devlinks_list_entry(dev); 500 while (entry != NULL) { 501 physpath = udev_list_entry_get_name(entry); 502 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 503 (void) strlcpy(bufptr, physpath, buflen); 504 return (0); 505 } 506 entry = udev_list_entry_get_next(entry); 507 } 508 509 /* 510 * For all other devices fallback to using the by-uuid name. 511 */ 512 entry = udev_device_get_devlinks_list_entry(dev); 513 while (entry != NULL) { 514 physpath = udev_list_entry_get_name(entry); 515 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { 516 (void) strlcpy(bufptr, physpath, buflen); 517 return (0); 518 } 519 entry = udev_list_entry_get_next(entry); 520 } 521 522 return (ENODATA); 523 } 524 525 /* 526 * A disk is considered a multipath whole disk when: 527 * DEVNAME key value has "dm-" 528 * DM_NAME key value has "mpath" prefix 529 * DM_UUID key exists 530 * ID_PART_TABLE_TYPE key does not exist or is not gpt 531 */ 532 static boolean_t 533 udev_mpath_whole_disk(struct udev_device *dev) 534 { 535 const char *devname, *type, *uuid; 536 537 devname = udev_device_get_property_value(dev, "DEVNAME"); 538 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); 539 uuid = udev_device_get_property_value(dev, "DM_UUID"); 540 541 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && 542 ((type == NULL) || (strcmp(type, "gpt") != 0)) && 543 (uuid != NULL)) { 544 return (B_TRUE); 545 } 546 547 return (B_FALSE); 548 } 549 550 static int 551 udev_device_is_ready(struct udev_device *dev) 552 { 553 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED 554 return (udev_device_get_is_initialized(dev)); 555 #else 556 /* wait for DEVLINKS property to be initialized */ 557 return (udev_device_get_property_value(dev, "DEVLINKS") != NULL); 558 #endif 559 } 560 561 #else 562 563 int 564 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 565 { 566 (void) dev, (void) bufptr, (void) buflen; 567 return (ENODATA); 568 } 569 570 int 571 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 572 { 573 (void) dev, (void) bufptr, (void) buflen; 574 return (ENODATA); 575 } 576 577 #endif /* HAVE_LIBUDEV */ 578 579 /* 580 * Wait up to timeout_ms for udev to set up the device node. The device is 581 * considered ready when libudev determines it has been initialized, all of 582 * the device links have been verified to exist, and it has been allowed to 583 * settle. At this point the device the device can be accessed reliably. 584 * Depending on the complexity of the udev rules this process could take 585 * several seconds. 586 */ 587 int 588 zpool_label_disk_wait(const char *path, int timeout_ms) 589 { 590 #ifdef HAVE_LIBUDEV 591 struct udev *udev; 592 struct udev_device *dev = NULL; 593 char nodepath[MAXPATHLEN]; 594 char *sysname = NULL; 595 int ret = ENODEV; 596 int settle_ms = 50; 597 long sleep_ms = 10; 598 hrtime_t start, settle; 599 600 if ((udev = udev_new()) == NULL) 601 return (ENXIO); 602 603 start = gethrtime(); 604 settle = 0; 605 606 do { 607 if (sysname == NULL) { 608 if (realpath(path, nodepath) != NULL) { 609 sysname = strrchr(nodepath, '/') + 1; 610 } else { 611 (void) usleep(sleep_ms * MILLISEC); 612 continue; 613 } 614 } 615 616 dev = udev_device_new_from_subsystem_sysname(udev, 617 "block", sysname); 618 if ((dev != NULL) && udev_device_is_ready(dev)) { 619 struct udev_list_entry *links, *link = NULL; 620 621 ret = 0; 622 links = udev_device_get_devlinks_list_entry(dev); 623 624 udev_list_entry_foreach(link, links) { 625 struct stat64 statbuf; 626 const char *name; 627 628 name = udev_list_entry_get_name(link); 629 errno = 0; 630 if (stat64(name, &statbuf) == 0 && errno == 0) 631 continue; 632 633 settle = 0; 634 ret = ENODEV; 635 break; 636 } 637 638 if (ret == 0) { 639 if (settle == 0) { 640 settle = gethrtime(); 641 } else if (NSEC2MSEC(gethrtime() - settle) >= 642 settle_ms) { 643 udev_device_unref(dev); 644 break; 645 } 646 } 647 } 648 649 udev_device_unref(dev); 650 (void) usleep(sleep_ms * MILLISEC); 651 652 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 653 654 udev_unref(udev); 655 656 return (ret); 657 #else 658 int settle_ms = 50; 659 long sleep_ms = 10; 660 hrtime_t start, settle; 661 struct stat64 statbuf; 662 663 start = gethrtime(); 664 settle = 0; 665 666 do { 667 errno = 0; 668 if ((stat64(path, &statbuf) == 0) && (errno == 0)) { 669 if (settle == 0) 670 settle = gethrtime(); 671 else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) 672 return (0); 673 } else if (errno != ENOENT) { 674 return (errno); 675 } 676 677 usleep(sleep_ms * MILLISEC); 678 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 679 680 return (ENODEV); 681 #endif /* HAVE_LIBUDEV */ 682 } 683 684 /* 685 * Encode the persistent devices strings 686 * used for the vdev disk label 687 */ 688 static int 689 encode_device_strings(const char *path, vdev_dev_strs_t *ds, 690 boolean_t wholedisk) 691 { 692 #ifdef HAVE_LIBUDEV 693 struct udev *udev; 694 struct udev_device *dev = NULL; 695 char nodepath[MAXPATHLEN]; 696 char *sysname; 697 int ret = ENODEV; 698 hrtime_t start; 699 700 if ((udev = udev_new()) == NULL) 701 return (ENXIO); 702 703 /* resolve path to a runtime device node instance */ 704 if (realpath(path, nodepath) == NULL) 705 goto no_dev; 706 707 sysname = strrchr(nodepath, '/') + 1; 708 709 /* 710 * Wait up to 3 seconds for udev to set up the device node context 711 */ 712 start = gethrtime(); 713 do { 714 dev = udev_device_new_from_subsystem_sysname(udev, "block", 715 sysname); 716 if (dev == NULL) 717 goto no_dev; 718 if (udev_device_is_ready(dev)) 719 break; /* udev ready */ 720 721 udev_device_unref(dev); 722 dev = NULL; 723 724 if (NSEC2MSEC(gethrtime() - start) < 10) 725 (void) sched_yield(); /* yield/busy wait up to 10ms */ 726 else 727 (void) usleep(10 * MILLISEC); 728 729 } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC)); 730 731 if (dev == NULL) 732 goto no_dev; 733 734 /* 735 * Only whole disks require extra device strings 736 */ 737 if (!wholedisk && !udev_mpath_whole_disk(dev)) 738 goto no_dev; 739 740 ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid)); 741 if (ret != 0) 742 goto no_dev_ref; 743 744 /* physical location string (optional) */ 745 if (zfs_device_get_physical(dev, ds->vds_devphys, 746 sizeof (ds->vds_devphys)) != 0) { 747 ds->vds_devphys[0] = '\0'; /* empty string --> not available */ 748 } 749 750 no_dev_ref: 751 udev_device_unref(dev); 752 no_dev: 753 udev_unref(udev); 754 755 return (ret); 756 #else 757 (void) path; 758 (void) ds; 759 (void) wholedisk; 760 return (ENOENT); 761 #endif 762 } 763 764 /* 765 * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it 766 * in the nvlist * (if applicable). Like: 767 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 768 */ 769 static void 770 update_vdev_config_dev_sysfs_path(nvlist_t *nv, char *path) 771 { 772 char *upath, *spath; 773 774 /* Add enclosure sysfs path (if disk is in an enclosure). */ 775 upath = zfs_get_underlying_path(path); 776 spath = zfs_get_enclosure_sysfs_path(upath); 777 778 if (spath) { 779 nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath); 780 } else { 781 nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 782 } 783 784 free(upath); 785 free(spath); 786 } 787 788 /* 789 * This will get called for each leaf vdev. 790 */ 791 static int 792 sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data) 793 { 794 (void) hdl_data, (void) data; 795 796 char *path = NULL; 797 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 798 return (1); 799 800 /* Rescan our enclosure sysfs path for this vdev */ 801 update_vdev_config_dev_sysfs_path(nv, path); 802 return (0); 803 } 804 805 /* 806 * Given an nvlist for our pool (with vdev tree), iterate over all the 807 * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH. 808 */ 809 void 810 update_vdevs_config_dev_sysfs_path(nvlist_t *config) 811 { 812 nvlist_t *nvroot = NULL; 813 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 814 &nvroot) == 0); 815 for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL); 816 } 817 818 /* 819 * Update a leaf vdev's persistent device strings 820 * 821 * - only applies for a dedicated leaf vdev (aka whole disk) 822 * - updated during pool create|add|attach|import 823 * - used for matching device matching during auto-{online,expand,replace} 824 * - stored in a leaf disk config label (i.e. alongside 'path' NVP) 825 * - these strings are currently not used in kernel (i.e. for vdev_disk_open) 826 * 827 * single device node example: 828 * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1' 829 * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0' 830 * 831 * multipath device node example: 832 * devid: 'dm-uuid-mpath-35000c5006304de3f' 833 * 834 * We also store the enclosure sysfs path for turning on enclosure LEDs 835 * (if applicable): 836 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 837 */ 838 void 839 update_vdev_config_dev_strs(nvlist_t *nv) 840 { 841 vdev_dev_strs_t vds; 842 char *env, *type, *path; 843 uint64_t wholedisk = 0; 844 845 /* 846 * For the benefit of legacy ZFS implementations, allow 847 * for opting out of devid strings in the vdev label. 848 * 849 * example use: 850 * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer 851 * 852 * explanation: 853 * Older OpenZFS implementations had issues when attempting to 854 * display pool config VDEV names if a "devid" NVP value is 855 * present in the pool's config. 856 * 857 * For example, a pool that originated on illumos platform would 858 * have a devid value in the config and "zpool status" would fail 859 * when listing the config. 860 * 861 * A pool can be stripped of any "devid" values on import or 862 * prevented from adding them on zpool create|add by setting 863 * ZFS_VDEV_DEVID_OPT_OUT. 864 */ 865 env = getenv("ZFS_VDEV_DEVID_OPT_OUT"); 866 if (env && (strtoul(env, NULL, 0) > 0 || 867 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { 868 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 869 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 870 return; 871 } 872 873 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 || 874 strcmp(type, VDEV_TYPE_DISK) != 0) { 875 return; 876 } 877 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 878 return; 879 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); 880 881 /* 882 * Update device string values in the config nvlist. 883 */ 884 if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) { 885 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid); 886 if (vds.vds_devphys[0] != '\0') { 887 (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, 888 vds.vds_devphys); 889 } 890 update_vdev_config_dev_sysfs_path(nv, path); 891 } else { 892 /* Clear out any stale entries. */ 893 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 894 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 895 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 896 } 897 } 898