1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2020 Joyent, Inc. 14 * Copyright 2022 Tintri by DDN, Inc. All rights reserved. 15 * Copyright 2024 Oxide Computer Company 16 */ 17 18 /* 19 * This file drives topo node enumeration of NVMe controllers. A single "nvme" 20 * node is enumerated for each NVMe controller. Child "disk" nodes are then 21 * enumerated for each active or attached NVMe namespace. 22 * 23 * nvme nodes are expected to be enumerated under either a "bay" node (for U.2 24 * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC 25 * devices). 26 * 27 * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven 28 * by the pcibus topo module. 29 * 30 * In order to allow for associating a given NVMe controller with a physical 31 * location, enumeration of U.2 and M.2 devices should be driven by a 32 * platform-specific topo map which statically sets the following two 33 * properties on the parent "bay" or "slot" node: 34 * 35 * propgroup property description 36 * --------- -------- ------------ 37 * binding driver "nvme" 38 * binding parent-device devpath of parent PCIe device 39 * 40 * for example: 41 * 42 * <propgroup name="binding" version="1" name-stability="Private" 43 * data-stability="Private"> 44 * <propval name="driver" type="string" value="nvme"/> 45 * <propval name="parent-device" type="string" 46 * value="/pci@0,0/pci8086,6f09@3,1"/> 47 * </propgroup> 48 * <dependents grouping="children"> 49 * <range name="nvme" min="0" max="0"> 50 * <enum-method name="disk" version="1"/> 51 * </range> 52 * </dependents> 53 */ 54 #include <stdlib.h> 55 #include <sys/types.h> 56 #include <sys/stat.h> 57 #include <fcntl.h> 58 #include <unistd.h> 59 #include <string.h> 60 #include <strings.h> 61 #include <stdbool.h> 62 63 #include <sys/fm/protocol.h> 64 #include <fm/topo_hc.h> 65 #include <fm/topo_mod.h> 66 #include <topo_ufm.h> 67 68 #include <sys/dkio.h> 69 #include <sys/scsi/generic/inquiry.h> 70 71 #include <libnvme.h> 72 #include "disk.h" 73 #include "disk_drivers.h" 74 75 typedef struct nvme_enum_info { 76 topo_mod_t *nei_mod; 77 di_node_t nei_dinode; 78 nvme_t *nei_libnvme; 79 nvme_ctrl_t *nei_ctrl; 80 nvme_ctrl_info_t *nei_ctrl_info; 81 const nvme_version_t *nei_vers; 82 tnode_t *nei_parent; 83 tnode_t *nei_nvme; 84 nvlist_t *nei_nvme_fmri; 85 int nei_fd; 86 } nvme_enum_info_t; 87 88 typedef struct devlink_arg { 89 topo_mod_t *dla_mod; 90 char *dla_logical_disk; 91 uint_t dla_strsz; 92 } devlink_arg_t; 93 94 static int 95 devlink_cb(di_devlink_t dl, void *arg) 96 { 97 devlink_arg_t *dlarg = (devlink_arg_t *)arg; 98 topo_mod_t *mod = dlarg->dla_mod; 99 const char *devpath; 100 char *slice, *ctds; 101 102 if ((devpath = di_devlink_path(dl)) == NULL || 103 (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) == 104 NULL) { 105 return (DI_WALK_TERMINATE); 106 } 107 108 /* 109 * We need to keep track of the original string size before we 110 * truncate it with a NUL, so that we can free the right number of 111 * bytes when we're done, otherwise libumem will complain. 112 */ 113 dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1; 114 115 /* trim the slice off the public name */ 116 if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) && 117 ((slice = strchr(ctds, 's')) != NULL)) 118 *slice = '\0'; 119 120 return (DI_WALK_TERMINATE); 121 } 122 123 static char * 124 get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz) 125 { 126 di_devlink_handle_t devhdl; 127 devlink_arg_t dlarg = { 0 }; 128 char *minorpath = NULL; 129 130 if (asprintf(&minorpath, "%s:a", devpath) < 0) { 131 return (NULL); 132 } 133 134 if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) { 135 topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__); 136 free(minorpath); 137 return (NULL); 138 } 139 140 dlarg.dla_mod = mod; 141 142 (void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK, 143 &dlarg, devlink_cb); 144 145 (void) di_devlink_fini(&devhdl); 146 free(minorpath); 147 148 *bufsz = dlarg.dla_strsz; 149 return (dlarg.dla_logical_disk); 150 } 151 152 static bool 153 disk_nvme_make_ns_serial(topo_mod_t *mod, nvme_ns_info_t *ns_info, char *buf, 154 size_t buflen) 155 { 156 uint8_t nguid[16], eui64[8]; 157 int ret; 158 159 if (nvme_ns_info_nguid(ns_info, nguid)) { 160 ret = snprintf(buf, buflen, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X" 161 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 162 nguid[0], nguid[1], nguid[2], nguid[3], nguid[4], 163 nguid[5], nguid[6], nguid[7], nguid[8], nguid[9], 164 nguid[10], nguid[11], nguid[12], nguid[13], nguid[14], 165 nguid[15]); 166 } else if (nvme_ns_info_eui64(ns_info, eui64)) { 167 ret = snprintf(buf, buflen, 168 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 169 eui64[0], eui64[1], eui64[2], eui64[3], eui64[4], 170 eui64[5], eui64[6], eui64[7]); 171 } else { 172 ret = snprintf(buf, buflen, "%u", nvme_ns_info_nsid(ns_info)); 173 } 174 175 if ((size_t)ret >= buflen) { 176 topo_mod_dprintf(mod, "overflowed serial number for nsid %u: " 177 "needed %zu bytes, got %d", nvme_ns_info_nsid(ns_info), 178 buflen, ret); 179 return (false); 180 } 181 182 return (true); 183 } 184 185 /* 186 * Create the common I/O property group properties that are shared between 187 * controllers and namespaces. We assume the property group was already created. 188 */ 189 static bool 190 disk_nvme_common_io(topo_mod_t *mod, tnode_t *tn, di_node_t di) 191 { 192 int err; 193 int inst = di_instance(di); 194 const char *drv = di_driver_name(di); 195 char *path; 196 const char *ppaths[1]; 197 198 if (inst != -1 && topo_prop_set_uint32(tn, TOPO_PGROUP_IO, 199 TOPO_IO_INSTANCE, TOPO_PROP_IMMUTABLE, (uint32_t)inst, &err) != 0) { 200 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 201 "%s", TOPO_PGROUP_IO, TOPO_IO_INSTANCE, topo_node_name(tn), 202 topo_node_instance(tn), topo_strerror(err)); 203 return (false); 204 } 205 206 if (drv != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO, 207 TOPO_IO_DRIVER, TOPO_PROP_IMMUTABLE, drv, &err) != 0) { 208 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 209 "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn), 210 topo_node_instance(tn), topo_strerror(err)); 211 return (false); 212 } 213 214 if (drv != NULL) { 215 nvlist_t *fmri = topo_mod_modfmri(mod, FM_MOD_SCHEME_VERSION, 216 drv); 217 if (mod != NULL && topo_prop_set_fmri(tn, TOPO_PGROUP_IO, 218 TOPO_IO_MODULE, TOPO_PROP_IMMUTABLE, fmri, &err) != 0) { 219 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" 220 PRIu64 "]: %s", TOPO_PGROUP_IO, TOPO_IO_MODULE, 221 topo_node_name(tn), topo_node_instance(tn), 222 topo_strerror(err)); 223 nvlist_free(fmri); 224 return (false); 225 } 226 nvlist_free(fmri); 227 } 228 229 path = di_devfs_path(di); 230 ppaths[0] = path; 231 if (path != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO, 232 TOPO_IO_DEV_PATH, TOPO_PROP_IMMUTABLE, path, &err) != 0) { 233 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 234 "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn), 235 topo_node_instance(tn), topo_strerror(err)); 236 di_devfs_path_free(path); 237 return (false); 238 } 239 240 if (path != NULL && topo_prop_set_string_array(tn, TOPO_PGROUP_IO, 241 TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err) != 0) { 242 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 243 "%s", TOPO_PGROUP_IO, TOPO_IO_PHYS_PATH, topo_node_name(tn), 244 topo_node_instance(tn), topo_strerror(err)); 245 di_devfs_path_free(path); 246 return (false); 247 } 248 di_devfs_path_free(path); 249 250 return (true); 251 } 252 253 /* 254 * Add the various storage and I/O property group items that are appropriate 255 * given that we have a devinfo node. The storage property group has already 256 * been created, but the I/O property group has not. 257 */ 258 static void 259 disk_nvme_make_ns_di_props(topo_mod_t *mod, tnode_t *tn, di_node_t di) 260 { 261 int err; 262 char *devid, *mfg, *model, *rev, *serial, *log, *path; 263 uint_t buflen; 264 265 if (di_prop_lookup_strings(DDI_DEV_T_ANY, di, DEVID_PROP_NAME, 266 &devid) != 1 || 267 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_VENDOR_ID, 268 &mfg) != 1 || 269 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_PRODUCT_ID, 270 &model) != 1 || 271 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_REVISION_ID, 272 &rev) != 1 || 273 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_SERIAL_NO, 274 &serial) != 1) { 275 topo_mod_dprintf(mod, "failed to get devinfo props for %s[%" 276 PRIu64 "]", topo_node_name(tn), topo_node_instance(tn)); 277 return; 278 } 279 280 /* 281 * Set the basic storage manufacturer information. Yes, this is 282 * information really about the NVMe controller and not the namespace. 283 * That's how the storage property group basically works here. 284 */ 285 if (topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 286 TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, mfg, &err) != 0 || 287 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 288 TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err) != 0 || 289 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 290 TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err) != 0 || 291 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 292 TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err) != 0) { 293 topo_mod_dprintf(mod, "failed to set storage properties on " 294 "%s[%" PRIu64 "]: %s", topo_node_name(tn), 295 topo_node_instance(tn), topo_strerror(err)); 296 return; 297 } 298 299 if (topo_pgroup_create(tn, &io_pgroup, &err) != 0) { 300 topo_mod_dprintf(mod, "failed to create I/O property " 301 "group on %s[%" PRIu64 "]: %s", topo_node_name(tn), 302 topo_node_instance(tn), topo_strerror(err)); 303 } 304 305 if (!disk_nvme_common_io(mod, tn, di)) { 306 return; 307 } 308 309 /* 310 * The last property that we'd like to attempt to create for a namespace 311 * is a mapping back to its corresponding logical disk entry in /dev. 312 * The logical disk will be everything past the trailing /, i.e. a 313 * cXtXdX value. 314 */ 315 path = di_devfs_path(di); 316 if (path == NULL) { 317 return; 318 } 319 log = get_logical_disk(mod, path, &buflen); 320 di_devfs_path_free(path); 321 if (log == NULL) { 322 return; 323 } 324 path = strrchr(log, '/'); 325 if (path != NULL && path[1] != '\0' && 326 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 327 TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, path + 1, 328 &err) != 0) { 329 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" 330 PRIu64 "]: %s", TOPO_PGROUP_STORAGE, 331 TOPO_STORAGE_LOGICAL_DISK_NAME, topo_node_name(tn), 332 topo_node_instance(tn), topo_strerror(err)); 333 } 334 topo_mod_free(mod, log, buflen); 335 } 336 337 static void 338 disk_nvme_make_ns(nvme_enum_info_t *nei, nvme_ns_info_t *ns_info) 339 { 340 topo_mod_t *mod = nei->nei_mod; 341 nvlist_t *auth = NULL, *fmri = NULL; 342 const uint32_t nsid = nvme_ns_info_nsid(ns_info); 343 const topo_instance_t inst = nsid - 1; 344 char serial[64], capstr[64]; 345 const nvme_nvm_lba_fmt_t *fmt; 346 const char *bd_addr; 347 uint64_t cap, blksz, capblks; 348 tnode_t *tn; 349 int err; 350 351 auth = topo_mod_auth(mod, nei->nei_nvme); 352 if (auth == NULL) { 353 topo_mod_dprintf(mod, "failed to get auth for nsid %u from " 354 "parent %s[%" PRIu64 "]: %s", nsid, 355 topo_node_name(nei->nei_nvme), 356 topo_node_instance(nei->nei_nvme), topo_mod_errmsg(mod)); 357 goto done; 358 } 359 360 /* 361 * We want to construct the FMRI for the namespace. The namespace is a 362 * little awkward in terms of things like the model, revision, and 363 * serial. While blkdev sets up standard inquiry properties to map these 364 * to the parent device which makes sense in the context of trying to 365 * use this as a normal block device, it's not really appropriate here. 366 * The namespace is not the NVMe controller. We construct the namespace 367 * serial number from the preferential ordering of information that 368 * we're given of the NGUID, EUI64, and then fall back to the namespace 369 * number. 370 */ 371 if (!disk_nvme_make_ns_serial(mod, ns_info, serial, sizeof (serial))) { 372 goto done; 373 } 374 fmri = topo_mod_hcfmri(mod, nei->nei_nvme, FM_HC_SCHEME_VERSION, 375 DISK, inst, NULL, auth, NULL, NULL, serial); 376 if (fmri == NULL) { 377 topo_mod_dprintf(mod, "failed to make fmri for %s[%" PRIu64 378 "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod)); 379 goto done; 380 } 381 382 tn = topo_node_bind(mod, nei->nei_nvme, DISK, inst, fmri); 383 if (tn == NULL) { 384 topo_mod_dprintf(mod, "failed to bind fmri for %s[%" PRIu64 385 "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod)); 386 goto done; 387 } 388 389 /* 390 * Always inherit our parent's FRU. The namespace is just a part of the 391 * device in reality. 392 */ 393 if (topo_node_fru_set(tn, NULL, 0, &err) != 0) { 394 topo_mod_dprintf(mod, "failed to set FRU for %s[%" PRIu64 395 "] on nsid %u: %s", DISK, inst, nsid, topo_strerror(err)); 396 goto done; 397 398 } 399 400 /* 401 * Our namespace may or may not be attached. From the namespace we will 402 * always get the capacity and block information. The rest of it will 403 * end up being filled in if we find a devinfo node. 404 */ 405 if (topo_pgroup_create(tn, &storage_pgroup, &err) != 0) { 406 topo_mod_dprintf(mod, "failed to create storage property " 407 "group on %s[%" PRIu64 "]: %s", DISK, inst, 408 topo_strerror(err)); 409 } 410 411 if (!nvme_ns_info_curformat(ns_info, &fmt)) { 412 topo_mod_dprintf(mod, "failed to get current namespace " 413 "format: %s", nvme_ns_info_errmsg(ns_info)); 414 goto done; 415 } 416 417 blksz = nvme_nvm_lba_fmt_data_size(fmt); 418 if (topo_prop_set_uint64(tn, TOPO_PGROUP_STORAGE, 419 TOPO_STORAGE_LOG_BLOCK_SIZE, TOPO_PROP_IMMUTABLE, blksz, &err) != 420 0) { 421 topo_mod_dprintf(mod, "failed to create property %s:%s on %s[%" 422 PRIu64 "]: %s", TOPO_PGROUP_STORAGE, 423 TOPO_STORAGE_LOG_BLOCK_SIZE, DISK, inst, 424 topo_strerror(err)); 425 goto done; 426 } 427 428 if (!nvme_ns_info_cap(ns_info, &capblks)) { 429 topo_mod_dprintf(mod, "failed to get namespace capacity: %s", 430 nvme_ns_info_errmsg(ns_info)); 431 goto done; 432 } 433 434 cap = blksz * capblks; 435 if (snprintf(capstr, sizeof (capstr), "%" PRIu64, cap) >= 436 sizeof (capstr)) { 437 topo_mod_dprintf(mod, "overflowed capacity calculation on " 438 "nsid %u", nsid); 439 goto done; 440 } 441 442 /* 443 * Finally attempt to find a child node that has a matching name and go 444 * from there. Sorry, this does result in node creation being O(n^2), 445 * but at least n is usually small today. Note, we may not have a blkdev 446 * address because the disk may not be attached. 447 */ 448 if (!nvme_ns_info_bd_addr(ns_info, &bd_addr)) { 449 if (nvme_ns_info_err(ns_info) != NVME_INFO_ERR_NS_NO_BLKDEV) { 450 topo_mod_dprintf(mod, "failed to get namespace blkdev " 451 "address: %s", nvme_ns_info_errmsg(ns_info)); 452 } 453 goto done; 454 } 455 456 for (di_node_t di = di_child_node(nei->nei_dinode); di != DI_NODE_NIL; 457 di = di_sibling_node(di)) { 458 const char *addr = di_bus_addr(di); 459 if (addr != NULL && strcmp(addr, bd_addr) == 0) { 460 disk_nvme_make_ns_di_props(mod, tn, di); 461 } 462 } 463 464 done: 465 nvlist_free(auth); 466 nvlist_free(fmri); 467 } 468 469 /* 470 * Attempt to make a ufm node, but swallow the error so we can try to get as 471 * much of the disk information as possible. 472 */ 473 static void 474 disk_nvme_make_ufm(topo_mod_t *mod, nvme_enum_info_t *nei) 475 { 476 topo_ufm_devinfo_t tud; 477 char *path = di_devfs_path(nei->nei_dinode); 478 if (path == NULL) { 479 return; 480 } 481 482 tud.tud_method = TOPO_UFM_M_DEVINFO; 483 tud.tud_path = path; 484 if (topo_mod_load(mod, TOPO_MOD_UFM, TOPO_VERSION) == NULL) { 485 topo_mod_dprintf(mod, "disk enum could not load ufm module"); 486 di_devfs_path_free(path); 487 return; 488 } 489 490 (void) topo_mod_enumerate(mod, nei->nei_nvme, TOPO_MOD_UFM, UFM, 0, 0, 491 &tud); 492 di_devfs_path_free(path); 493 } 494 495 static const topo_pgroup_info_t nvme_pgroup = { 496 TOPO_PGROUP_NVME, 497 TOPO_STABILITY_PRIVATE, 498 TOPO_STABILITY_PRIVATE, 499 1 500 }; 501 502 static int 503 make_nvme_node(nvme_enum_info_t *nvme_info) 504 { 505 topo_mod_t *mod = nvme_info->nei_mod; 506 nvme_ctrl_info_t *info = nvme_info->nei_ctrl_info; 507 nvme_ns_iter_t *iter = NULL; 508 nvme_iter_t nret; 509 const nvme_ns_disc_t *disc; 510 nvlist_t *auth = NULL, *fmri = NULL, *fru; 511 tnode_t *nvme; 512 char *model = NULL, *serial = NULL, *vers = NULL; 513 char *pname = topo_node_name(nvme_info->nei_parent); 514 char *label = NULL; 515 topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent); 516 int err = 0, ret = -1; 517 518 /* 519 * Pass the model and serial strings through a function that sanitizes 520 * them of any characters that can't be used in an FMRI string. Note, we 521 * do not use the firmware revision here because that's not really a 522 * device property that should be part of the FMRI (it can be changed at 523 * runtime). 524 */ 525 model = topo_mod_clean_str(mod, nvme_ctrl_info_model(info)); 526 serial = topo_mod_clean_str(mod, nvme_ctrl_info_serial(info)); 527 528 auth = topo_mod_auth(mod, nvme_info->nei_parent); 529 fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION, 530 NVME, 0, NULL, auth, model, NULL, serial); 531 532 if (fmri == NULL) { 533 /* errno set */ 534 topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%" PRIu64 535 "/%s=0", __func__, pname, pinst, NVME); 536 goto error; 537 } 538 539 /* 540 * If our parent is a pciexfn node, then we need to create a nvme range 541 * underneath it to hold the nvme hierarchy. For other cases, where 542 * enumeration is being driven by a topo map file, this range will have 543 * already been statically defined in the XML. 544 */ 545 if (strcmp(pname, PCIEX_FUNCTION) == 0) { 546 if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0, 547 0) < 0) { 548 /* errno set */ 549 topo_mod_dprintf(mod, "%s: error creating %s range", 550 __func__, NVME); 551 goto error; 552 } 553 } 554 555 /* 556 * Create a new topo node to represent the NVMe controller and bind it 557 * to the parent node. 558 */ 559 if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0, 560 fmri)) == NULL) { 561 /* errno set */ 562 topo_mod_dprintf(mod, "%s: bind failed for %s=%" PRIu64 563 "/%s=0", __func__, pname, pinst, NVME); 564 goto error; 565 } 566 nvme_info->nei_nvme = nvme; 567 nvme_info->nei_nvme_fmri = fmri; 568 569 /* 570 * If our parent node is a "pciexfn" node then this is a NVMe device on 571 * a PCIe AIC, so we inherit our parent's FRU. Otherwise, we set the 572 * FRU to ourself. 573 */ 574 if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0) 575 fru = NULL; 576 else 577 fru = fmri; 578 579 if (topo_node_fru_set(nvme, fru, 0, &err) != 0) { 580 topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__, 581 topo_strerror(err)); 582 (void) topo_mod_seterrno(mod, err); 583 goto error; 584 } 585 586 /* 587 * Clone the label from our parent node. We can't inherit the property 588 * because the label prop is mutable on bay nodes and only immutable 589 * properties can be inherited. 590 */ 591 if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 && 592 err != ETOPO_PROP_NOENT) || 593 topo_node_label_set(nvme, label, &err) != 0) { 594 topo_mod_dprintf(mod, "%s: failed to set label: %s", 595 __func__, topo_strerror(err)); 596 (void) topo_mod_seterrno(mod, err); 597 goto error; 598 } 599 600 /* 601 * Ensure that we have a UFM property set based on our devinfo path. 602 * This is a little repetitive if our parent actually did so as well, 603 * but given that the majority of such nodes are under bays and slots 604 * right now, it's a worthwhile tradeoff. 605 */ 606 disk_nvme_make_ufm(mod, nvme_info); 607 608 if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) { 609 topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", 610 __func__, TOPO_PGROUP_NVME, topo_strerror(err)); 611 (void) topo_mod_seterrno(mod, err); 612 goto error; 613 } 614 615 if (asprintf(&vers, "%u.%u", nvme_info->nei_vers->v_major, 616 nvme_info->nei_vers->v_minor) < 0) { 617 topo_mod_dprintf(mod, "%s: failed to alloc string", __func__); 618 (void) topo_mod_seterrno(mod, EMOD_NOMEM); 619 goto error; 620 } 621 if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER, 622 TOPO_PROP_IMMUTABLE, vers, &err) != 0) { 623 topo_mod_dprintf(mod, "%s: failed to set %s/%s property", 624 __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER); 625 (void) topo_mod_seterrno(mod, err); 626 goto error; 627 } 628 629 if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) { 630 topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", 631 __func__, TOPO_PGROUP_IO, topo_strerror(err)); 632 (void) topo_mod_seterrno(mod, err); 633 goto error; 634 } 635 636 if (!disk_nvme_common_io(mod, nvme, nvme_info->nei_dinode)) { 637 goto error; 638 } 639 640 /* 641 * Create a child disk node for each namespace. 642 */ 643 if (topo_node_range_create(mod, nvme, DISK, 0, 644 nvme_ctrl_info_nns(info) - 1) < 0) { 645 /* errno set */ 646 topo_mod_dprintf(mod, "%s: error creating %s range", __func__, 647 DISK); 648 goto error; 649 } 650 651 /* 652 * Iterate over each namespace to see if it's a candidate for inclusion. 653 * Namespaces start at index 1 and not every namespace will be included. 654 * We map things such that a disk instance is always namespace - 1 to 655 * fit into the above mapping. 656 */ 657 if (!nvme_ns_discover_init(nvme_info->nei_ctrl, 658 NVME_NS_DISC_F_NOT_IGNORED, &iter)) { 659 topo_mod_dprintf(mod, "failed to initialize namespace " 660 "discovery: %s", nvme_errmsg(nvme_info->nei_libnvme)); 661 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); 662 goto error; 663 } 664 665 for (nret = nvme_ns_discover_step(iter, &disc); nret == NVME_ITER_VALID; 666 nret = nvme_ns_discover_step(iter, &disc)) { 667 nvme_ns_info_t *ns_info; 668 uint32_t nsid = nvme_ns_disc_nsid(disc); 669 670 if (!nvme_ctrl_ns_info_snap(nvme_info->nei_ctrl, nsid, 671 &ns_info)) { 672 topo_mod_dprintf(mod, "failed to get namespace " 673 "information for ns %u: %s", nsid, 674 nvme_errmsg(nvme_info->nei_libnvme)); 675 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); 676 goto error; 677 } 678 679 disk_nvme_make_ns(nvme_info, ns_info); 680 nvme_ns_info_free(ns_info); 681 } 682 683 if (nret == NVME_ITER_ERROR) { 684 topo_mod_dprintf(mod, "namespace discovery failed: %s", 685 nvme_errmsg(nvme_info->nei_libnvme)); 686 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); 687 } 688 ret = 0; 689 690 error: 691 nvme_ns_discover_fini(iter); 692 free(vers); 693 nvlist_free(auth); 694 nvlist_free(fmri); 695 topo_mod_strfree(mod, model); 696 topo_mod_strfree(mod, serial); 697 topo_mod_strfree(mod, label); 698 return (ret); 699 } 700 701 /* 702 * This function gathers identity information from the NVMe controller and 703 * stores it in a struct. This struct is passed to make_nvme_node(), which 704 * does the actual topo node creation. 705 */ 706 static int 707 discover_nvme_ctl(topo_mod_t *mod, tnode_t *pnode, di_node_t dinode) 708 { 709 topo_disk_t *disk = topo_mod_getspecific(mod); 710 nvme_enum_info_t nvme_info = { 0 }; 711 int ret; 712 713 nvme_info.nei_mod = mod; 714 nvme_info.nei_dinode = dinode; 715 nvme_info.nei_parent = pnode; 716 nvme_info.nei_libnvme = disk->td_nvme; 717 718 if (!nvme_ctrl_init(disk->td_nvme, dinode, &nvme_info.nei_ctrl)) { 719 topo_mod_dprintf(mod, "failed to initialize nvme_ctrl_t: %s", 720 nvme_errmsg(disk->td_nvme)); 721 return (topo_mod_seterrno(mod, EMOD_UNKNOWN)); 722 } 723 724 if (!nvme_ctrl_info_snap(nvme_info.nei_ctrl, 725 &nvme_info.nei_ctrl_info)) { 726 topo_mod_dprintf(mod, "failed to initialize nvme_ctrl_t: %s", 727 nvme_errmsg(disk->td_nvme)); 728 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); 729 goto error; 730 } 731 732 nvme_info.nei_vers = nvme_ctrl_info_version(nvme_info.nei_ctrl_info); 733 734 if ((ret = make_nvme_node(&nvme_info)) != 0) { 735 goto error; 736 } 737 738 error: 739 if (nvme_info.nei_ctrl_info != NULL) 740 nvme_ctrl_info_free(nvme_info.nei_ctrl_info); 741 if (nvme_info.nei_ctrl != NULL) 742 nvme_ctrl_fini(nvme_info.nei_ctrl); 743 return (ret); 744 } 745 746 int 747 disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode) 748 { 749 char *parent = NULL; 750 int err; 751 di_node_t devtree; 752 di_node_t dnode; 753 int ret = -1; 754 755 /* 756 * Lookup a property containing the devfs path of the parent PCIe 757 * device of the NVMe device we're attempting to enumerate. This 758 * property is hard-coded in per-platform topo XML maps that are 759 * delivered with the OS. This hard-coded path allows topo to map a 760 * given NVMe controller to a physical location (bay or slot) on the 761 * platform, when generating the topo snapshot. 762 */ 763 if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING, 764 TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) { 765 topo_mod_dprintf(mod, "parent node was missing nvme binding " 766 "properties\n"); 767 (void) topo_mod_seterrno(mod, err); 768 goto out; 769 } 770 if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) { 771 topo_mod_dprintf(mod, "failed to get devinfo snapshot"); 772 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 773 goto out; 774 } 775 776 /* 777 * Walk the devinfo tree looking NVMe devices. For each NVMe device, 778 * check if the devfs path of the parent matches the one specified in 779 * TOPO_BINDING_PARENT_DEV. 780 */ 781 dnode = di_drv_first_node(NVME_DRV, devtree); 782 while (dnode != DI_NODE_NIL) { 783 char *path; 784 785 if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) { 786 topo_mod_dprintf(mod, "failed to get dev path"); 787 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 788 goto out; 789 } 790 if (strcmp(parent, path) == 0) { 791 ret = discover_nvme_ctl(mod, pnode, dnode); 792 di_devfs_path_free(path); 793 goto out; 794 } 795 di_devfs_path_free(path); 796 dnode = di_drv_next_node(dnode); 797 } 798 ret = 0; 799 800 out: 801 topo_mod_strfree(mod, parent); 802 return (ret); 803 } 804