1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 25 * Copyright 2015 RackTop Systems. 26 * Copyright (c) 2016, Intel Corporation. 27 */ 28 29 /* 30 * Pool import support functions. 31 * 32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since 33 * these commands are expected to run in the global zone, we can assume 34 * that the devices are all readable when called. 35 * 36 * To import a pool, we rely on reading the configuration information from the 37 * ZFS label of each device. If we successfully read the label, then we 38 * organize the configuration information in the following hierarchy: 39 * 40 * pool guid -> toplevel vdev guid -> label txg 41 * 42 * Duplicate entries matching this same tuple will be discarded. Once we have 43 * examined every device, we pick the best label txg config for each toplevel 44 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 45 * update any paths that have changed. Finally, we attempt to import the pool 46 * using our derived config, and record the results. 47 */ 48 49 #include <ctype.h> 50 #include <dirent.h> 51 #include <errno.h> 52 #include <libintl.h> 53 #include <libgen.h> 54 #include <stddef.h> 55 #include <stdlib.h> 56 #include <stdio.h> 57 #include <string.h> 58 #include <sys/stat.h> 59 #include <unistd.h> 60 #include <fcntl.h> 61 #include <sys/dktp/fdisk.h> 62 #include <sys/vdev_impl.h> 63 #include <sys/fs/zfs.h> 64 65 #include <thread_pool.h> 66 #include <libzutil.h> 67 #include <libnvpair.h> 68 #include <libzfs.h> 69 70 #include "zutil_import.h" 71 72 #ifdef HAVE_LIBUDEV 73 #include <libudev.h> 74 #include <sched.h> 75 #endif 76 #include <blkid/blkid.h> 77 78 #define DEV_BYID_PATH "/dev/disk/by-id/" 79 80 /* 81 * Skip devices with well known prefixes: 82 * there can be side effects when opening devices which need to be avoided. 83 * 84 * hpet - High Precision Event Timer 85 * watchdog[N] - Watchdog must be closed in a special way. 86 */ 87 static boolean_t 88 should_skip_dev(const char *dev) 89 { 90 return ((strcmp(dev, "watchdog") == 0) || 91 (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) || 92 (strcmp(dev, "hpet") == 0)); 93 } 94 95 int 96 zfs_dev_flush(int fd) 97 { 98 return (ioctl(fd, BLKFLSBUF)); 99 } 100 101 void 102 zpool_open_func(void *arg) 103 { 104 rdsk_node_t *rn = arg; 105 libpc_handle_t *hdl = rn->rn_hdl; 106 struct stat64 statbuf; 107 nvlist_t *config; 108 uint64_t vdev_guid = 0; 109 int error; 110 int num_labels = 0; 111 int fd; 112 113 if (should_skip_dev(zfs_basename(rn->rn_name))) 114 return; 115 116 /* 117 * Ignore failed stats. We only want regular files and block devices. 118 * Ignore files that are too small to hold a zpool. 119 */ 120 if (stat64(rn->rn_name, &statbuf) != 0 || 121 (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) || 122 (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE)) 123 return; 124 125 /* 126 * Preferentially open using O_DIRECT to bypass the block device 127 * cache which may be stale for multipath devices. An EINVAL errno 128 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY. 129 */ 130 fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC); 131 if ((fd < 0) && (errno == EINVAL)) 132 fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC); 133 if ((fd < 0) && (errno == EACCES)) 134 hdl->lpc_open_access_error = B_TRUE; 135 if (fd < 0) 136 return; 137 138 error = zpool_read_label(fd, &config, &num_labels); 139 if (error != 0) { 140 (void) close(fd); 141 return; 142 } 143 144 if (num_labels == 0) { 145 (void) close(fd); 146 nvlist_free(config); 147 return; 148 } 149 150 /* 151 * Check that the vdev is for the expected guid. Additional entries 152 * are speculatively added based on the paths stored in the labels. 153 * Entries with valid paths but incorrect guids must be removed. 154 */ 155 error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 156 if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) { 157 (void) close(fd); 158 nvlist_free(config); 159 return; 160 } 161 162 (void) close(fd); 163 164 rn->rn_config = config; 165 rn->rn_num_labels = num_labels; 166 167 /* 168 * Add additional entries for paths described by this label. 169 */ 170 if (rn->rn_labelpaths) { 171 const char *path = NULL; 172 const char *devid = NULL; 173 const char *env = NULL; 174 rdsk_node_t *slice; 175 avl_index_t where; 176 int timeout; 177 int error; 178 179 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) 180 return; 181 182 env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); 183 if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || 184 timeout < 0) { 185 timeout = DISK_LABEL_WAIT; 186 } 187 188 /* 189 * Allow devlinks to stabilize so all paths are available. 190 */ 191 zpool_label_disk_wait(rn->rn_name, timeout); 192 193 if (path != NULL) { 194 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 195 slice->rn_name = zutil_strdup(hdl, path); 196 slice->rn_vdev_guid = vdev_guid; 197 slice->rn_avl = rn->rn_avl; 198 slice->rn_hdl = hdl; 199 slice->rn_order = IMPORT_ORDER_PREFERRED_1; 200 slice->rn_labelpaths = B_FALSE; 201 pthread_mutex_lock(rn->rn_lock); 202 if (avl_find(rn->rn_avl, slice, &where)) { 203 pthread_mutex_unlock(rn->rn_lock); 204 free(slice->rn_name); 205 free(slice); 206 } else { 207 avl_insert(rn->rn_avl, slice, where); 208 pthread_mutex_unlock(rn->rn_lock); 209 zpool_open_func(slice); 210 } 211 } 212 213 if (devid != NULL) { 214 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 215 error = asprintf(&slice->rn_name, "%s%s", 216 DEV_BYID_PATH, devid); 217 if (error == -1) { 218 free(slice); 219 return; 220 } 221 222 slice->rn_vdev_guid = vdev_guid; 223 slice->rn_avl = rn->rn_avl; 224 slice->rn_hdl = hdl; 225 slice->rn_order = IMPORT_ORDER_PREFERRED_2; 226 slice->rn_labelpaths = B_FALSE; 227 pthread_mutex_lock(rn->rn_lock); 228 if (avl_find(rn->rn_avl, slice, &where)) { 229 pthread_mutex_unlock(rn->rn_lock); 230 free(slice->rn_name); 231 free(slice); 232 } else { 233 avl_insert(rn->rn_avl, slice, where); 234 pthread_mutex_unlock(rn->rn_lock); 235 zpool_open_func(slice); 236 } 237 } 238 } 239 } 240 241 static const char * const 242 zpool_default_import_path[] = { 243 "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ 244 "/dev/mapper", /* Use multipath devices before components */ 245 "/dev/disk/by-partlabel", /* Single unique entry set by user */ 246 "/dev/disk/by-partuuid", /* Generated partition uuid */ 247 "/dev/disk/by-label", /* Custom persistent labels */ 248 "/dev/disk/by-uuid", /* Single unique entry and persistent */ 249 "/dev/disk/by-id", /* May be multiple entries and persistent */ 250 "/dev/disk/by-path", /* Encodes physical location and persistent */ 251 "/dev" /* UNSAFE device names will change */ 252 }; 253 254 const char * const * 255 zpool_default_search_paths(size_t *count) 256 { 257 *count = ARRAY_SIZE(zpool_default_import_path); 258 return (zpool_default_import_path); 259 } 260 261 /* 262 * Given a full path to a device determine if that device appears in the 263 * import search path. If it does return the first match and store the 264 * index in the passed 'order' variable, otherwise return an error. 265 */ 266 static int 267 zfs_path_order(const char *name, int *order) 268 { 269 const char *env = getenv("ZPOOL_IMPORT_PATH"); 270 271 if (env) { 272 for (int i = 0; ; ++i) { 273 env += strspn(env, ":"); 274 size_t dirlen = strcspn(env, ":"); 275 if (dirlen) { 276 if (strncmp(name, env, dirlen) == 0) { 277 *order = i; 278 return (0); 279 } 280 281 env += dirlen; 282 } else 283 break; 284 } 285 } else { 286 for (int i = 0; i < ARRAY_SIZE(zpool_default_import_path); 287 ++i) { 288 if (strncmp(name, zpool_default_import_path[i], 289 strlen(zpool_default_import_path[i])) == 0) { 290 *order = i; 291 return (0); 292 } 293 } 294 } 295 296 return (ENOENT); 297 } 298 299 /* 300 * Use libblkid to quickly enumerate all known zfs devices. 301 */ 302 int 303 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, 304 avl_tree_t **slice_cache) 305 { 306 rdsk_node_t *slice; 307 blkid_cache cache; 308 blkid_dev_iterate iter; 309 blkid_dev dev; 310 avl_index_t where; 311 int error; 312 313 *slice_cache = NULL; 314 315 error = blkid_get_cache(&cache, NULL); 316 if (error != 0) 317 return (error); 318 319 error = blkid_probe_all_new(cache); 320 if (error != 0) { 321 blkid_put_cache(cache); 322 return (error); 323 } 324 325 iter = blkid_dev_iterate_begin(cache); 326 if (iter == NULL) { 327 blkid_put_cache(cache); 328 return (EINVAL); 329 } 330 331 /* Only const char *s since 2.32 */ 332 error = blkid_dev_set_search(iter, 333 (char *)"TYPE", (char *)"zfs_member"); 334 if (error != 0) { 335 blkid_dev_iterate_end(iter); 336 blkid_put_cache(cache); 337 return (error); 338 } 339 340 *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t)); 341 avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t), 342 offsetof(rdsk_node_t, rn_node)); 343 344 while (blkid_dev_next(iter, &dev) == 0) { 345 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 346 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev)); 347 slice->rn_vdev_guid = 0; 348 slice->rn_lock = lock; 349 slice->rn_avl = *slice_cache; 350 slice->rn_hdl = hdl; 351 slice->rn_labelpaths = B_TRUE; 352 353 error = zfs_path_order(slice->rn_name, &slice->rn_order); 354 if (error == 0) 355 slice->rn_order += IMPORT_ORDER_SCAN_OFFSET; 356 else 357 slice->rn_order = IMPORT_ORDER_DEFAULT; 358 359 pthread_mutex_lock(lock); 360 if (avl_find(*slice_cache, slice, &where)) { 361 free(slice->rn_name); 362 free(slice); 363 } else { 364 avl_insert(*slice_cache, slice, where); 365 } 366 pthread_mutex_unlock(lock); 367 } 368 369 blkid_dev_iterate_end(iter); 370 blkid_put_cache(cache); 371 372 return (0); 373 } 374 375 /* 376 * Linux persistent device strings for vdev labels 377 * 378 * based on libudev for consistency with libudev disk add/remove events 379 */ 380 381 typedef struct vdev_dev_strs { 382 char vds_devid[128]; 383 char vds_devphys[128]; 384 } vdev_dev_strs_t; 385 386 #ifdef HAVE_LIBUDEV 387 388 /* 389 * Obtain the persistent device id string (describes what) 390 * 391 * used by ZED vdev matching for auto-{online,expand,replace} 392 */ 393 int 394 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 395 { 396 struct udev_list_entry *entry; 397 const char *bus; 398 char devbyid[MAXPATHLEN]; 399 400 /* The bus based by-id path is preferred */ 401 bus = udev_device_get_property_value(dev, "ID_BUS"); 402 403 if (bus == NULL) { 404 const char *dm_uuid; 405 406 /* 407 * For multipath nodes use the persistent uuid based identifier 408 * 409 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f 410 */ 411 dm_uuid = udev_device_get_property_value(dev, "DM_UUID"); 412 if (dm_uuid != NULL) { 413 (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid); 414 return (0); 415 } 416 417 /* 418 * For volumes use the persistent /dev/zvol/dataset identifier 419 */ 420 entry = udev_device_get_devlinks_list_entry(dev); 421 while (entry != NULL) { 422 const char *name; 423 424 name = udev_list_entry_get_name(entry); 425 if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 426 (void) strlcpy(bufptr, name, buflen); 427 return (0); 428 } 429 entry = udev_list_entry_get_next(entry); 430 } 431 432 /* 433 * NVME 'by-id' symlinks are similar to bus case 434 */ 435 struct udev_device *parent; 436 437 parent = udev_device_get_parent_with_subsystem_devtype(dev, 438 "nvme", NULL); 439 if (parent != NULL) 440 bus = "nvme"; /* continue with bus symlink search */ 441 else 442 return (ENODATA); 443 } 444 445 /* 446 * locate the bus specific by-id link 447 */ 448 (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus); 449 entry = udev_device_get_devlinks_list_entry(dev); 450 while (entry != NULL) { 451 const char *name; 452 453 name = udev_list_entry_get_name(entry); 454 if (strncmp(name, devbyid, strlen(devbyid)) == 0) { 455 name += strlen(DEV_BYID_PATH); 456 (void) strlcpy(bufptr, name, buflen); 457 return (0); 458 } 459 entry = udev_list_entry_get_next(entry); 460 } 461 462 return (ENODATA); 463 } 464 465 /* 466 * Obtain the persistent physical location string (describes where) 467 * 468 * used by ZED vdev matching for auto-{online,expand,replace} 469 */ 470 int 471 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 472 { 473 const char *physpath = NULL; 474 struct udev_list_entry *entry; 475 476 /* 477 * Normal disks use ID_PATH for their physical path. 478 */ 479 physpath = udev_device_get_property_value(dev, "ID_PATH"); 480 if (physpath != NULL && strlen(physpath) > 0) { 481 (void) strlcpy(bufptr, physpath, buflen); 482 return (0); 483 } 484 485 /* 486 * Device mapper devices are virtual and don't have a physical 487 * path. For them we use ID_VDEV instead, which is setup via the 488 * /etc/vdev_id.conf file. ID_VDEV provides a persistent path 489 * to a virtual device. If you don't have vdev_id.conf setup, 490 * you cannot use multipath autoreplace with device mapper. 491 */ 492 physpath = udev_device_get_property_value(dev, "ID_VDEV"); 493 if (physpath != NULL && strlen(physpath) > 0) { 494 (void) strlcpy(bufptr, physpath, buflen); 495 return (0); 496 } 497 498 /* 499 * For ZFS volumes use the persistent /dev/zvol/dataset identifier 500 */ 501 entry = udev_device_get_devlinks_list_entry(dev); 502 while (entry != NULL) { 503 physpath = udev_list_entry_get_name(entry); 504 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 505 (void) strlcpy(bufptr, physpath, buflen); 506 return (0); 507 } 508 entry = udev_list_entry_get_next(entry); 509 } 510 511 /* 512 * For all other devices fallback to using the by-uuid name. 513 */ 514 entry = udev_device_get_devlinks_list_entry(dev); 515 while (entry != NULL) { 516 physpath = udev_list_entry_get_name(entry); 517 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { 518 (void) strlcpy(bufptr, physpath, buflen); 519 return (0); 520 } 521 entry = udev_list_entry_get_next(entry); 522 } 523 524 return (ENODATA); 525 } 526 527 /* 528 * A disk is considered a multipath whole disk when: 529 * DEVNAME key value has "dm-" 530 * DM_NAME key value has "mpath" prefix 531 * DM_UUID key exists 532 * ID_PART_TABLE_TYPE key does not exist or is not gpt 533 */ 534 static boolean_t 535 udev_mpath_whole_disk(struct udev_device *dev) 536 { 537 const char *devname, *type, *uuid; 538 539 devname = udev_device_get_property_value(dev, "DEVNAME"); 540 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); 541 uuid = udev_device_get_property_value(dev, "DM_UUID"); 542 543 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && 544 ((type == NULL) || (strcmp(type, "gpt") != 0)) && 545 (uuid != NULL)) { 546 return (B_TRUE); 547 } 548 549 return (B_FALSE); 550 } 551 552 static int 553 udev_device_is_ready(struct udev_device *dev) 554 { 555 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED 556 return (udev_device_get_is_initialized(dev)); 557 #else 558 /* wait for DEVLINKS property to be initialized */ 559 return (udev_device_get_property_value(dev, "DEVLINKS") != NULL); 560 #endif 561 } 562 563 #else 564 565 int 566 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 567 { 568 (void) dev, (void) bufptr, (void) buflen; 569 return (ENODATA); 570 } 571 572 int 573 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 574 { 575 (void) dev, (void) bufptr, (void) buflen; 576 return (ENODATA); 577 } 578 579 #endif /* HAVE_LIBUDEV */ 580 581 /* 582 * Wait up to timeout_ms for udev to set up the device node. The device is 583 * considered ready when libudev determines it has been initialized, all of 584 * the device links have been verified to exist, and it has been allowed to 585 * settle. At this point the device can be accessed reliably. Depending on 586 * the complexity of the udev rules this process could take several seconds. 587 */ 588 int 589 zpool_label_disk_wait(const char *path, int timeout_ms) 590 { 591 #ifdef HAVE_LIBUDEV 592 struct udev *udev; 593 struct udev_device *dev = NULL; 594 char nodepath[MAXPATHLEN]; 595 char *sysname = NULL; 596 int ret = ENODEV; 597 int settle_ms = 50; 598 long sleep_ms = 10; 599 hrtime_t start, settle; 600 601 if ((udev = udev_new()) == NULL) 602 return (ENXIO); 603 604 start = gethrtime(); 605 settle = 0; 606 607 do { 608 if (sysname == NULL) { 609 if (realpath(path, nodepath) != NULL) { 610 sysname = strrchr(nodepath, '/') + 1; 611 } else { 612 (void) usleep(sleep_ms * MILLISEC); 613 continue; 614 } 615 } 616 617 dev = udev_device_new_from_subsystem_sysname(udev, 618 "block", sysname); 619 if ((dev != NULL) && udev_device_is_ready(dev)) { 620 struct udev_list_entry *links, *link = NULL; 621 622 ret = 0; 623 links = udev_device_get_devlinks_list_entry(dev); 624 625 udev_list_entry_foreach(link, links) { 626 struct stat64 statbuf; 627 const char *name; 628 629 name = udev_list_entry_get_name(link); 630 errno = 0; 631 if (stat64(name, &statbuf) == 0 && errno == 0) 632 continue; 633 634 settle = 0; 635 ret = ENODEV; 636 break; 637 } 638 639 if (ret == 0) { 640 if (settle == 0) { 641 settle = gethrtime(); 642 } else if (NSEC2MSEC(gethrtime() - settle) >= 643 settle_ms) { 644 udev_device_unref(dev); 645 break; 646 } 647 } 648 } 649 650 udev_device_unref(dev); 651 (void) usleep(sleep_ms * MILLISEC); 652 653 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 654 655 udev_unref(udev); 656 657 return (ret); 658 #else 659 int settle_ms = 50; 660 long sleep_ms = 10; 661 hrtime_t start, settle; 662 struct stat64 statbuf; 663 664 start = gethrtime(); 665 settle = 0; 666 667 do { 668 errno = 0; 669 if ((stat64(path, &statbuf) == 0) && (errno == 0)) { 670 if (settle == 0) 671 settle = gethrtime(); 672 else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) 673 return (0); 674 } else if (errno != ENOENT) { 675 return (errno); 676 } 677 678 usleep(sleep_ms * MILLISEC); 679 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 680 681 return (ENODEV); 682 #endif /* HAVE_LIBUDEV */ 683 } 684 685 /* 686 * Encode the persistent devices strings 687 * used for the vdev disk label 688 */ 689 static int 690 encode_device_strings(const char *path, vdev_dev_strs_t *ds, 691 boolean_t wholedisk) 692 { 693 #ifdef HAVE_LIBUDEV 694 struct udev *udev; 695 struct udev_device *dev = NULL; 696 char nodepath[MAXPATHLEN]; 697 char *sysname; 698 int ret = ENODEV; 699 hrtime_t start; 700 701 if ((udev = udev_new()) == NULL) 702 return (ENXIO); 703 704 /* resolve path to a runtime device node instance */ 705 if (realpath(path, nodepath) == NULL) 706 goto no_dev; 707 708 sysname = strrchr(nodepath, '/') + 1; 709 710 /* 711 * Wait up to 3 seconds for udev to set up the device node context 712 */ 713 start = gethrtime(); 714 do { 715 dev = udev_device_new_from_subsystem_sysname(udev, "block", 716 sysname); 717 if (dev == NULL) 718 goto no_dev; 719 if (udev_device_is_ready(dev)) 720 break; /* udev ready */ 721 722 udev_device_unref(dev); 723 dev = NULL; 724 725 if (NSEC2MSEC(gethrtime() - start) < 10) 726 (void) sched_yield(); /* yield/busy wait up to 10ms */ 727 else 728 (void) usleep(10 * MILLISEC); 729 730 } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC)); 731 732 if (dev == NULL) 733 goto no_dev; 734 735 /* 736 * Only whole disks require extra device strings 737 */ 738 if (!wholedisk && !udev_mpath_whole_disk(dev)) 739 goto no_dev; 740 741 ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid)); 742 if (ret != 0) 743 goto no_dev_ref; 744 745 /* physical location string (optional) */ 746 if (zfs_device_get_physical(dev, ds->vds_devphys, 747 sizeof (ds->vds_devphys)) != 0) { 748 ds->vds_devphys[0] = '\0'; /* empty string --> not available */ 749 } 750 751 no_dev_ref: 752 udev_device_unref(dev); 753 no_dev: 754 udev_unref(udev); 755 756 return (ret); 757 #else 758 (void) path; 759 (void) ds; 760 (void) wholedisk; 761 return (ENOENT); 762 #endif 763 } 764 765 /* 766 * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it 767 * in the nvlist * (if applicable). Like: 768 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 769 * 770 * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH) 771 */ 772 void 773 update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, 774 const char *key) 775 { 776 char *upath, *spath; 777 778 /* Add enclosure sysfs path (if disk is in an enclosure). */ 779 upath = zfs_get_underlying_path(path); 780 spath = zfs_get_enclosure_sysfs_path(upath); 781 782 if (spath) { 783 (void) nvlist_add_string(nv, key, spath); 784 } else { 785 (void) nvlist_remove_all(nv, key); 786 } 787 788 free(upath); 789 free(spath); 790 } 791 792 /* 793 * This will get called for each leaf vdev. 794 */ 795 static int 796 sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data) 797 { 798 (void) hdl_data, (void) data; 799 800 const char *path = NULL; 801 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 802 return (1); 803 804 /* Rescan our enclosure sysfs path for this vdev */ 805 update_vdev_config_dev_sysfs_path(nv, path, 806 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 807 return (0); 808 } 809 810 /* 811 * Given an nvlist for our pool (with vdev tree), iterate over all the 812 * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH. 813 */ 814 void 815 update_vdevs_config_dev_sysfs_path(nvlist_t *config) 816 { 817 nvlist_t *nvroot = NULL; 818 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 819 &nvroot) == 0); 820 for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL); 821 } 822 823 /* 824 * Update a leaf vdev's persistent device strings 825 * 826 * - only applies for a dedicated leaf vdev (aka whole disk) 827 * - updated during pool create|add|attach|import 828 * - used for matching device matching during auto-{online,expand,replace} 829 * - stored in a leaf disk config label (i.e. alongside 'path' NVP) 830 * - these strings are currently not used in kernel (i.e. for vdev_disk_open) 831 * 832 * single device node example: 833 * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1' 834 * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0' 835 * 836 * multipath device node example: 837 * devid: 'dm-uuid-mpath-35000c5006304de3f' 838 * 839 * We also store the enclosure sysfs path for turning on enclosure LEDs 840 * (if applicable): 841 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 842 */ 843 void 844 update_vdev_config_dev_strs(nvlist_t *nv) 845 { 846 vdev_dev_strs_t vds; 847 const char *env, *type, *path; 848 uint64_t wholedisk = 0; 849 850 /* 851 * For the benefit of legacy ZFS implementations, allow 852 * for opting out of devid strings in the vdev label. 853 * 854 * example use: 855 * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer 856 * 857 * explanation: 858 * Older OpenZFS implementations had issues when attempting to 859 * display pool config VDEV names if a "devid" NVP value is 860 * present in the pool's config. 861 * 862 * For example, a pool that originated on illumos platform would 863 * have a devid value in the config and "zpool status" would fail 864 * when listing the config. 865 * 866 * A pool can be stripped of any "devid" values on import or 867 * prevented from adding them on zpool create|add by setting 868 * ZFS_VDEV_DEVID_OPT_OUT. 869 */ 870 env = getenv("ZFS_VDEV_DEVID_OPT_OUT"); 871 if (env && (strtoul(env, NULL, 0) > 0 || 872 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { 873 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 874 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 875 return; 876 } 877 878 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 || 879 strcmp(type, VDEV_TYPE_DISK) != 0) { 880 return; 881 } 882 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 883 return; 884 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); 885 886 /* 887 * Update device string values in the config nvlist. 888 */ 889 if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) { 890 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid); 891 if (vds.vds_devphys[0] != '\0') { 892 (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, 893 vds.vds_devphys); 894 } 895 update_vdev_config_dev_sysfs_path(nv, path, 896 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 897 } else { 898 /* Clear out any stale entries. */ 899 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 900 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 901 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 902 } 903 } 904