1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 25 * Copyright 2015 RackTop Systems. 26 * Copyright (c) 2016, Intel Corporation. 27 */ 28 29 /* 30 * Pool import support functions. 31 * 32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since 33 * these commands are expected to run in the global zone, we can assume 34 * that the devices are all readable when called. 35 * 36 * To import a pool, we rely on reading the configuration information from the 37 * ZFS label of each device. If we successfully read the label, then we 38 * organize the configuration information in the following hierarchy: 39 * 40 * pool guid -> toplevel vdev guid -> label txg 41 * 42 * Duplicate entries matching this same tuple will be discarded. Once we have 43 * examined every device, we pick the best label txg config for each toplevel 44 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 45 * update any paths that have changed. Finally, we attempt to import the pool 46 * using our derived config, and record the results. 47 */ 48 49 #include <ctype.h> 50 #include <dirent.h> 51 #include <errno.h> 52 #include <libintl.h> 53 #include <libgen.h> 54 #include <stddef.h> 55 #include <stdlib.h> 56 #include <stdio.h> 57 #include <string.h> 58 #include <sys/stat.h> 59 #include <unistd.h> 60 #include <fcntl.h> 61 #include <sys/dktp/fdisk.h> 62 #include <sys/vdev_impl.h> 63 #include <sys/fs/zfs.h> 64 65 #include <thread_pool.h> 66 #include <libzutil.h> 67 #include <libnvpair.h> 68 #include <libzfs.h> 69 70 #include "zutil_import.h" 71 72 #ifdef HAVE_LIBUDEV 73 #include <libudev.h> 74 #include <sched.h> 75 #endif 76 #include <blkid/blkid.h> 77 78 #define DEV_BYID_PATH "/dev/disk/by-id/" 79 80 /* 81 * Skip devices with well known prefixes: 82 * there can be side effects when opening devices which need to be avoided. 83 * 84 * hpet - High Precision Event Timer 85 * watchdog[N] - Watchdog must be closed in a special way. 86 */ 87 static boolean_t 88 should_skip_dev(const char *dev) 89 { 90 return ((strcmp(dev, "watchdog") == 0) || 91 (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) || 92 (strcmp(dev, "hpet") == 0)); 93 } 94 95 int 96 zfs_dev_flush(int fd) 97 { 98 return (ioctl(fd, BLKFLSBUF)); 99 } 100 101 void 102 zpool_open_func(void *arg) 103 { 104 rdsk_node_t *rn = arg; 105 libpc_handle_t *hdl = rn->rn_hdl; 106 struct stat64 statbuf; 107 nvlist_t *config; 108 uint64_t vdev_guid = 0; 109 int error; 110 int num_labels = 0; 111 int fd; 112 113 if (should_skip_dev(zfs_basename(rn->rn_name))) 114 return; 115 116 /* 117 * Ignore failed stats. We only want regular files and block devices. 118 * Ignore files that are too small to hold a zpool. 119 */ 120 if (stat64(rn->rn_name, &statbuf) != 0 || 121 (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) || 122 (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE)) 123 return; 124 125 /* 126 * Preferentially open using O_DIRECT to bypass the block device 127 * cache which may be stale for multipath devices. An EINVAL errno 128 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY. 129 */ 130 fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC); 131 if ((fd < 0) && (errno == EINVAL)) 132 fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC); 133 if ((fd < 0) && (errno == EACCES)) 134 hdl->lpc_open_access_error = B_TRUE; 135 if (fd < 0) 136 return; 137 138 error = zpool_read_label(fd, &config, &num_labels); 139 if (error != 0) { 140 (void) close(fd); 141 return; 142 } 143 144 if (num_labels == 0) { 145 (void) close(fd); 146 nvlist_free(config); 147 return; 148 } 149 150 /* 151 * Check that the vdev is for the expected guid. Additional entries 152 * are speculatively added based on the paths stored in the labels. 153 * Entries with valid paths but incorrect guids must be removed. 154 */ 155 error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 156 if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) { 157 (void) close(fd); 158 nvlist_free(config); 159 return; 160 } 161 162 (void) close(fd); 163 164 rn->rn_config = config; 165 rn->rn_num_labels = num_labels; 166 167 /* 168 * Add additional entries for paths described by this label. 169 */ 170 if (rn->rn_labelpaths) { 171 char *path = NULL; 172 char *devid = NULL; 173 char *env = NULL; 174 rdsk_node_t *slice; 175 avl_index_t where; 176 int timeout; 177 int error; 178 179 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) 180 return; 181 182 env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); 183 if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || 184 timeout < 0) { 185 timeout = DISK_LABEL_WAIT; 186 } 187 188 /* 189 * Allow devlinks to stabilize so all paths are available. 190 */ 191 zpool_label_disk_wait(rn->rn_name, timeout); 192 193 if (path != NULL) { 194 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 195 slice->rn_name = zutil_strdup(hdl, path); 196 slice->rn_vdev_guid = vdev_guid; 197 slice->rn_avl = rn->rn_avl; 198 slice->rn_hdl = hdl; 199 slice->rn_order = IMPORT_ORDER_PREFERRED_1; 200 slice->rn_labelpaths = B_FALSE; 201 pthread_mutex_lock(rn->rn_lock); 202 if (avl_find(rn->rn_avl, slice, &where)) { 203 pthread_mutex_unlock(rn->rn_lock); 204 free(slice->rn_name); 205 free(slice); 206 } else { 207 avl_insert(rn->rn_avl, slice, where); 208 pthread_mutex_unlock(rn->rn_lock); 209 zpool_open_func(slice); 210 } 211 } 212 213 if (devid != NULL) { 214 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 215 error = asprintf(&slice->rn_name, "%s%s", 216 DEV_BYID_PATH, devid); 217 if (error == -1) { 218 free(slice); 219 return; 220 } 221 222 slice->rn_vdev_guid = vdev_guid; 223 slice->rn_avl = rn->rn_avl; 224 slice->rn_hdl = hdl; 225 slice->rn_order = IMPORT_ORDER_PREFERRED_2; 226 slice->rn_labelpaths = B_FALSE; 227 pthread_mutex_lock(rn->rn_lock); 228 if (avl_find(rn->rn_avl, slice, &where)) { 229 pthread_mutex_unlock(rn->rn_lock); 230 free(slice->rn_name); 231 free(slice); 232 } else { 233 avl_insert(rn->rn_avl, slice, where); 234 pthread_mutex_unlock(rn->rn_lock); 235 zpool_open_func(slice); 236 } 237 } 238 } 239 } 240 241 static const char * const 242 zpool_default_import_path[] = { 243 "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ 244 "/dev/mapper", /* Use multipath devices before components */ 245 "/dev/disk/by-partlabel", /* Single unique entry set by user */ 246 "/dev/disk/by-partuuid", /* Generated partition uuid */ 247 "/dev/disk/by-label", /* Custom persistent labels */ 248 "/dev/disk/by-uuid", /* Single unique entry and persistent */ 249 "/dev/disk/by-id", /* May be multiple entries and persistent */ 250 "/dev/disk/by-path", /* Encodes physical location and persistent */ 251 "/dev" /* UNSAFE device names will change */ 252 }; 253 254 const char * const * 255 zpool_default_search_paths(size_t *count) 256 { 257 *count = ARRAY_SIZE(zpool_default_import_path); 258 return (zpool_default_import_path); 259 } 260 261 /* 262 * Given a full path to a device determine if that device appears in the 263 * import search path. If it does return the first match and store the 264 * index in the passed 'order' variable, otherwise return an error. 265 */ 266 static int 267 zfs_path_order(const char *name, int *order) 268 { 269 const char *env = getenv("ZPOOL_IMPORT_PATH"); 270 271 if (env) { 272 for (int i = 0; ; ++i) { 273 env += strspn(env, ":"); 274 size_t dirlen = strcspn(env, ":"); 275 if (dirlen) { 276 if (strncmp(name, env, dirlen) == 0) { 277 *order = i; 278 return (0); 279 } 280 281 env += dirlen; 282 } else 283 break; 284 } 285 } else { 286 for (int i = 0; i < ARRAY_SIZE(zpool_default_import_path); 287 ++i) { 288 if (strncmp(name, zpool_default_import_path[i], 289 strlen(zpool_default_import_path[i])) == 0) { 290 *order = i; 291 return (0); 292 } 293 } 294 } 295 296 return (ENOENT); 297 } 298 299 /* 300 * Use libblkid to quickly enumerate all known zfs devices. 301 */ 302 int 303 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, 304 avl_tree_t **slice_cache) 305 { 306 rdsk_node_t *slice; 307 blkid_cache cache; 308 blkid_dev_iterate iter; 309 blkid_dev dev; 310 avl_index_t where; 311 int error; 312 313 *slice_cache = NULL; 314 315 error = blkid_get_cache(&cache, NULL); 316 if (error != 0) 317 return (error); 318 319 error = blkid_probe_all_new(cache); 320 if (error != 0) { 321 blkid_put_cache(cache); 322 return (error); 323 } 324 325 iter = blkid_dev_iterate_begin(cache); 326 if (iter == NULL) { 327 blkid_put_cache(cache); 328 return (EINVAL); 329 } 330 331 /* Only const char *s since 2.32 */ 332 error = blkid_dev_set_search(iter, 333 (char *)"TYPE", (char *)"zfs_member"); 334 if (error != 0) { 335 blkid_dev_iterate_end(iter); 336 blkid_put_cache(cache); 337 return (error); 338 } 339 340 *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t)); 341 avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t), 342 offsetof(rdsk_node_t, rn_node)); 343 344 while (blkid_dev_next(iter, &dev) == 0) { 345 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 346 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev)); 347 slice->rn_vdev_guid = 0; 348 slice->rn_lock = lock; 349 slice->rn_avl = *slice_cache; 350 slice->rn_hdl = hdl; 351 slice->rn_labelpaths = B_TRUE; 352 353 error = zfs_path_order(slice->rn_name, &slice->rn_order); 354 if (error == 0) 355 slice->rn_order += IMPORT_ORDER_SCAN_OFFSET; 356 else 357 slice->rn_order = IMPORT_ORDER_DEFAULT; 358 359 pthread_mutex_lock(lock); 360 if (avl_find(*slice_cache, slice, &where)) { 361 free(slice->rn_name); 362 free(slice); 363 } else { 364 avl_insert(*slice_cache, slice, where); 365 } 366 pthread_mutex_unlock(lock); 367 } 368 369 blkid_dev_iterate_end(iter); 370 blkid_put_cache(cache); 371 372 return (0); 373 } 374 375 /* 376 * Linux persistent device strings for vdev labels 377 * 378 * based on libudev for consistency with libudev disk add/remove events 379 */ 380 381 typedef struct vdev_dev_strs { 382 char vds_devid[128]; 383 char vds_devphys[128]; 384 } vdev_dev_strs_t; 385 386 #ifdef HAVE_LIBUDEV 387 388 /* 389 * Obtain the persistent device id string (describes what) 390 * 391 * used by ZED vdev matching for auto-{online,expand,replace} 392 */ 393 int 394 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 395 { 396 struct udev_list_entry *entry; 397 const char *bus; 398 char devbyid[MAXPATHLEN]; 399 400 /* The bus based by-id path is preferred */ 401 bus = udev_device_get_property_value(dev, "ID_BUS"); 402 403 if (bus == NULL) { 404 const char *dm_uuid; 405 406 /* 407 * For multipath nodes use the persistent uuid based identifier 408 * 409 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f 410 */ 411 dm_uuid = udev_device_get_property_value(dev, "DM_UUID"); 412 if (dm_uuid != NULL) { 413 (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid); 414 return (0); 415 } 416 417 /* 418 * For volumes use the persistent /dev/zvol/dataset identifier 419 */ 420 entry = udev_device_get_devlinks_list_entry(dev); 421 while (entry != NULL) { 422 const char *name; 423 424 name = udev_list_entry_get_name(entry); 425 if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 426 (void) strlcpy(bufptr, name, buflen); 427 return (0); 428 } 429 entry = udev_list_entry_get_next(entry); 430 } 431 432 /* 433 * NVME 'by-id' symlinks are similar to bus case 434 */ 435 struct udev_device *parent; 436 437 parent = udev_device_get_parent_with_subsystem_devtype(dev, 438 "nvme", NULL); 439 if (parent != NULL) 440 bus = "nvme"; /* continue with bus symlink search */ 441 else 442 return (ENODATA); 443 } 444 445 /* 446 * locate the bus specific by-id link 447 */ 448 (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus); 449 entry = udev_device_get_devlinks_list_entry(dev); 450 while (entry != NULL) { 451 const char *name; 452 453 name = udev_list_entry_get_name(entry); 454 if (strncmp(name, devbyid, strlen(devbyid)) == 0) { 455 name += strlen(DEV_BYID_PATH); 456 (void) strlcpy(bufptr, name, buflen); 457 return (0); 458 } 459 entry = udev_list_entry_get_next(entry); 460 } 461 462 return (ENODATA); 463 } 464 465 /* 466 * Obtain the persistent physical location string (describes where) 467 * 468 * used by ZED vdev matching for auto-{online,expand,replace} 469 */ 470 int 471 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 472 { 473 const char *physpath = NULL; 474 struct udev_list_entry *entry; 475 476 /* 477 * Normal disks use ID_PATH for their physical path. 478 */ 479 physpath = udev_device_get_property_value(dev, "ID_PATH"); 480 if (physpath != NULL && strlen(physpath) > 0) { 481 (void) strlcpy(bufptr, physpath, buflen); 482 return (0); 483 } 484 485 /* 486 * Device mapper devices are virtual and don't have a physical 487 * path. For them we use ID_VDEV instead, which is setup via the 488 * /etc/vdev_id.conf file. ID_VDEV provides a persistent path 489 * to a virtual device. If you don't have vdev_id.conf setup, 490 * you cannot use multipath autoreplace with device mapper. 491 */ 492 physpath = udev_device_get_property_value(dev, "ID_VDEV"); 493 if (physpath != NULL && strlen(physpath) > 0) { 494 (void) strlcpy(bufptr, physpath, buflen); 495 return (0); 496 } 497 498 /* 499 * For ZFS volumes use the persistent /dev/zvol/dataset identifier 500 */ 501 entry = udev_device_get_devlinks_list_entry(dev); 502 while (entry != NULL) { 503 physpath = udev_list_entry_get_name(entry); 504 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 505 (void) strlcpy(bufptr, physpath, buflen); 506 return (0); 507 } 508 entry = udev_list_entry_get_next(entry); 509 } 510 511 /* 512 * For all other devices fallback to using the by-uuid name. 513 */ 514 entry = udev_device_get_devlinks_list_entry(dev); 515 while (entry != NULL) { 516 physpath = udev_list_entry_get_name(entry); 517 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { 518 (void) strlcpy(bufptr, physpath, buflen); 519 return (0); 520 } 521 entry = udev_list_entry_get_next(entry); 522 } 523 524 return (ENODATA); 525 } 526 527 /* 528 * A disk is considered a multipath whole disk when: 529 * DEVNAME key value has "dm-" 530 * DM_NAME key value has "mpath" prefix 531 * DM_UUID key exists 532 * ID_PART_TABLE_TYPE key does not exist or is not gpt 533 */ 534 static boolean_t 535 udev_mpath_whole_disk(struct udev_device *dev) 536 { 537 const char *devname, *type, *uuid; 538 539 devname = udev_device_get_property_value(dev, "DEVNAME"); 540 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); 541 uuid = udev_device_get_property_value(dev, "DM_UUID"); 542 543 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && 544 ((type == NULL) || (strcmp(type, "gpt") != 0)) && 545 (uuid != NULL)) { 546 return (B_TRUE); 547 } 548 549 return (B_FALSE); 550 } 551 552 static int 553 udev_device_is_ready(struct udev_device *dev) 554 { 555 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED 556 return (udev_device_get_is_initialized(dev)); 557 #else 558 /* wait for DEVLINKS property to be initialized */ 559 return (udev_device_get_property_value(dev, "DEVLINKS") != NULL); 560 #endif 561 } 562 563 #else 564 565 int 566 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 567 { 568 (void) dev, (void) bufptr, (void) buflen; 569 return (ENODATA); 570 } 571 572 int 573 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 574 { 575 (void) dev, (void) bufptr, (void) buflen; 576 return (ENODATA); 577 } 578 579 #endif /* HAVE_LIBUDEV */ 580 581 /* 582 * Wait up to timeout_ms for udev to set up the device node. The device is 583 * considered ready when libudev determines it has been initialized, all of 584 * the device links have been verified to exist, and it has been allowed to 585 * settle. At this point the device the device can be accessed reliably. 586 * Depending on the complexity of the udev rules this process could take 587 * several seconds. 588 */ 589 int 590 zpool_label_disk_wait(const char *path, int timeout_ms) 591 { 592 #ifdef HAVE_LIBUDEV 593 struct udev *udev; 594 struct udev_device *dev = NULL; 595 char nodepath[MAXPATHLEN]; 596 char *sysname = NULL; 597 int ret = ENODEV; 598 int settle_ms = 50; 599 long sleep_ms = 10; 600 hrtime_t start, settle; 601 602 if ((udev = udev_new()) == NULL) 603 return (ENXIO); 604 605 start = gethrtime(); 606 settle = 0; 607 608 do { 609 if (sysname == NULL) { 610 if (realpath(path, nodepath) != NULL) { 611 sysname = strrchr(nodepath, '/') + 1; 612 } else { 613 (void) usleep(sleep_ms * MILLISEC); 614 continue; 615 } 616 } 617 618 dev = udev_device_new_from_subsystem_sysname(udev, 619 "block", sysname); 620 if ((dev != NULL) && udev_device_is_ready(dev)) { 621 struct udev_list_entry *links, *link = NULL; 622 623 ret = 0; 624 links = udev_device_get_devlinks_list_entry(dev); 625 626 udev_list_entry_foreach(link, links) { 627 struct stat64 statbuf; 628 const char *name; 629 630 name = udev_list_entry_get_name(link); 631 errno = 0; 632 if (stat64(name, &statbuf) == 0 && errno == 0) 633 continue; 634 635 settle = 0; 636 ret = ENODEV; 637 break; 638 } 639 640 if (ret == 0) { 641 if (settle == 0) { 642 settle = gethrtime(); 643 } else if (NSEC2MSEC(gethrtime() - settle) >= 644 settle_ms) { 645 udev_device_unref(dev); 646 break; 647 } 648 } 649 } 650 651 udev_device_unref(dev); 652 (void) usleep(sleep_ms * MILLISEC); 653 654 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 655 656 udev_unref(udev); 657 658 return (ret); 659 #else 660 int settle_ms = 50; 661 long sleep_ms = 10; 662 hrtime_t start, settle; 663 struct stat64 statbuf; 664 665 start = gethrtime(); 666 settle = 0; 667 668 do { 669 errno = 0; 670 if ((stat64(path, &statbuf) == 0) && (errno == 0)) { 671 if (settle == 0) 672 settle = gethrtime(); 673 else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) 674 return (0); 675 } else if (errno != ENOENT) { 676 return (errno); 677 } 678 679 usleep(sleep_ms * MILLISEC); 680 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 681 682 return (ENODEV); 683 #endif /* HAVE_LIBUDEV */ 684 } 685 686 /* 687 * Encode the persistent devices strings 688 * used for the vdev disk label 689 */ 690 static int 691 encode_device_strings(const char *path, vdev_dev_strs_t *ds, 692 boolean_t wholedisk) 693 { 694 #ifdef HAVE_LIBUDEV 695 struct udev *udev; 696 struct udev_device *dev = NULL; 697 char nodepath[MAXPATHLEN]; 698 char *sysname; 699 int ret = ENODEV; 700 hrtime_t start; 701 702 if ((udev = udev_new()) == NULL) 703 return (ENXIO); 704 705 /* resolve path to a runtime device node instance */ 706 if (realpath(path, nodepath) == NULL) 707 goto no_dev; 708 709 sysname = strrchr(nodepath, '/') + 1; 710 711 /* 712 * Wait up to 3 seconds for udev to set up the device node context 713 */ 714 start = gethrtime(); 715 do { 716 dev = udev_device_new_from_subsystem_sysname(udev, "block", 717 sysname); 718 if (dev == NULL) 719 goto no_dev; 720 if (udev_device_is_ready(dev)) 721 break; /* udev ready */ 722 723 udev_device_unref(dev); 724 dev = NULL; 725 726 if (NSEC2MSEC(gethrtime() - start) < 10) 727 (void) sched_yield(); /* yield/busy wait up to 10ms */ 728 else 729 (void) usleep(10 * MILLISEC); 730 731 } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC)); 732 733 if (dev == NULL) 734 goto no_dev; 735 736 /* 737 * Only whole disks require extra device strings 738 */ 739 if (!wholedisk && !udev_mpath_whole_disk(dev)) 740 goto no_dev; 741 742 ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid)); 743 if (ret != 0) 744 goto no_dev_ref; 745 746 /* physical location string (optional) */ 747 if (zfs_device_get_physical(dev, ds->vds_devphys, 748 sizeof (ds->vds_devphys)) != 0) { 749 ds->vds_devphys[0] = '\0'; /* empty string --> not available */ 750 } 751 752 no_dev_ref: 753 udev_device_unref(dev); 754 no_dev: 755 udev_unref(udev); 756 757 return (ret); 758 #else 759 (void) path; 760 (void) ds; 761 (void) wholedisk; 762 return (ENOENT); 763 #endif 764 } 765 766 /* 767 * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it 768 * in the nvlist * (if applicable). Like: 769 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 770 */ 771 static void 772 update_vdev_config_dev_sysfs_path(nvlist_t *nv, char *path) 773 { 774 char *upath, *spath; 775 776 /* Add enclosure sysfs path (if disk is in an enclosure). */ 777 upath = zfs_get_underlying_path(path); 778 spath = zfs_get_enclosure_sysfs_path(upath); 779 780 if (spath) { 781 nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath); 782 } else { 783 nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 784 } 785 786 free(upath); 787 free(spath); 788 } 789 790 /* 791 * This will get called for each leaf vdev. 792 */ 793 static int 794 sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data) 795 { 796 (void) hdl_data, (void) data; 797 798 char *path = NULL; 799 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 800 return (1); 801 802 /* Rescan our enclosure sysfs path for this vdev */ 803 update_vdev_config_dev_sysfs_path(nv, path); 804 return (0); 805 } 806 807 /* 808 * Given an nvlist for our pool (with vdev tree), iterate over all the 809 * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH. 810 */ 811 void 812 update_vdevs_config_dev_sysfs_path(nvlist_t *config) 813 { 814 nvlist_t *nvroot = NULL; 815 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 816 &nvroot) == 0); 817 for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL); 818 } 819 820 /* 821 * Update a leaf vdev's persistent device strings 822 * 823 * - only applies for a dedicated leaf vdev (aka whole disk) 824 * - updated during pool create|add|attach|import 825 * - used for matching device matching during auto-{online,expand,replace} 826 * - stored in a leaf disk config label (i.e. alongside 'path' NVP) 827 * - these strings are currently not used in kernel (i.e. for vdev_disk_open) 828 * 829 * single device node example: 830 * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1' 831 * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0' 832 * 833 * multipath device node example: 834 * devid: 'dm-uuid-mpath-35000c5006304de3f' 835 * 836 * We also store the enclosure sysfs path for turning on enclosure LEDs 837 * (if applicable): 838 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 839 */ 840 void 841 update_vdev_config_dev_strs(nvlist_t *nv) 842 { 843 vdev_dev_strs_t vds; 844 char *env, *type, *path; 845 uint64_t wholedisk = 0; 846 847 /* 848 * For the benefit of legacy ZFS implementations, allow 849 * for opting out of devid strings in the vdev label. 850 * 851 * example use: 852 * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer 853 * 854 * explanation: 855 * Older OpenZFS implementations had issues when attempting to 856 * display pool config VDEV names if a "devid" NVP value is 857 * present in the pool's config. 858 * 859 * For example, a pool that originated on illumos platform would 860 * have a devid value in the config and "zpool status" would fail 861 * when listing the config. 862 * 863 * A pool can be stripped of any "devid" values on import or 864 * prevented from adding them on zpool create|add by setting 865 * ZFS_VDEV_DEVID_OPT_OUT. 866 */ 867 env = getenv("ZFS_VDEV_DEVID_OPT_OUT"); 868 if (env && (strtoul(env, NULL, 0) > 0 || 869 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { 870 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 871 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 872 return; 873 } 874 875 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 || 876 strcmp(type, VDEV_TYPE_DISK) != 0) { 877 return; 878 } 879 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 880 return; 881 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); 882 883 /* 884 * Update device string values in the config nvlist. 885 */ 886 if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) { 887 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid); 888 if (vds.vds_devphys[0] != '\0') { 889 (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, 890 vds.vds_devphys); 891 } 892 update_vdev_config_dev_sysfs_path(nv, path); 893 } else { 894 /* Clear out any stale entries. */ 895 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 896 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 897 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 898 } 899 } 900