1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Joyent, Inc.
14  * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
15  */
16 
17 /*
18  * This file drives topo node enumeration of NVMe controllers.  A single "nvme"
19  * node is enumerated for each NVMe controller.   Child "disk" nodes are then
20  * enumerated for each configured NVMe namespace.
21  *
22  * nvme nodes are expected to be enumerated under either a "bay" node (for U.2
23  * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC
24  * devices).
25  *
26  * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven
27  * by the pcibus topo module.
28  *
29  * In order to allow for associating a given NVMe controller with a physical
30  * location, enumeration of U.2 and M.2 devices should be driven by a
31  * platform-specific topo map which statically sets the following two
32  * properties on the parent "bay" or "slot" node:
33  *
34  * propgroup        property        description
35  * ---------        --------        ------------
36  * binding          driver          "nvme"
37  * binding          parent-device   devpath of parent PCIe device
38  *
39  * for example:
40  *
41  * <propgroup name="binding" version="1" name-stability="Private"
42  *   data-stability="Private">
43  *     <propval name="driver" type="string" value="nvme"/>
44  *     <propval name="parent-device" type="string"
45  *       value="/pci@0,0/pci8086,6f09@3,1"/>
46  * </propgroup>
47  * <dependents grouping="children">
48  *     <range name="nvme" min="0" max="0">
49  *         <enum-method name="disk" version="1"/>
50  *     </range>
51  * </dependents>
52  */
53 #include <stdlib.h>
54 #include <sys/types.h>
55 #include <sys/stat.h>
56 #include <fcntl.h>
57 #include <unistd.h>
58 #include <string.h>
59 #include <strings.h>
60 
61 #include <sys/fm/protocol.h>
62 #include <fm/topo_hc.h>
63 #include <fm/topo_mod.h>
64 
65 #include <sys/dkio.h>
66 #include <sys/scsi/generic/inquiry.h>
67 
68 #include <sys/nvme.h>
69 #include "disk.h"
70 #include "disk_drivers.h"
71 
72 typedef struct nvme_enum_info {
73 	topo_mod_t		*nei_mod;
74 	di_node_t		nei_dinode;
75 	nvme_identify_ctrl_t	*nei_idctl;
76 	nvme_version_t		nei_vers;
77 	tnode_t			*nei_parent;
78 	tnode_t			*nei_nvme;
79 	nvlist_t		*nei_nvme_fmri;
80 	const char		*nei_nvme_path;
81 	int			nei_fd;
82 } nvme_enum_info_t;
83 
84 typedef struct devlink_arg {
85 	topo_mod_t		*dla_mod;
86 	char			*dla_logical_disk;
87 	uint_t			dla_strsz;
88 } devlink_arg_t;
89 
90 static int
91 devlink_cb(di_devlink_t dl, void *arg)
92 {
93 	devlink_arg_t *dlarg = (devlink_arg_t *)arg;
94 	topo_mod_t *mod = dlarg->dla_mod;
95 	const char *devpath;
96 	char *slice, *ctds;
97 
98 	if ((devpath = di_devlink_path(dl)) == NULL ||
99 	    (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) ==
100 	    NULL) {
101 		return (DI_WALK_TERMINATE);
102 	}
103 
104 	/*
105 	 * We need to keep track of the original string size before we
106 	 * truncate it with a NUL, so that we can free the right number of
107 	 * bytes when we're done, otherwise libumem will complain.
108 	 */
109 	dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1;
110 
111 	/* trim the slice off the public name */
112 	if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) &&
113 	    ((slice = strchr(ctds, 's')) != NULL))
114 		*slice = '\0';
115 
116 	return (DI_WALK_TERMINATE);
117 }
118 
119 static char *
120 get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz)
121 {
122 	di_devlink_handle_t devhdl;
123 	devlink_arg_t dlarg = { 0 };
124 	char *minorpath = NULL;
125 
126 	if (asprintf(&minorpath, "%s:a", devpath) < 0) {
127 		return (NULL);
128 	}
129 
130 	if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) {
131 		topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__);
132 		free(minorpath);
133 		return (NULL);
134 	}
135 
136 	dlarg.dla_mod = mod;
137 
138 	(void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK,
139 	    &dlarg, devlink_cb);
140 
141 	(void) di_devlink_fini(&devhdl);
142 	free(minorpath);
143 
144 	*bufsz = dlarg.dla_strsz;
145 	return (dlarg.dla_logical_disk);
146 }
147 
148 static int
149 make_disk_node(nvme_enum_info_t *nvme_info, di_node_t dinode,
150     topo_instance_t inst)
151 {
152 	topo_mod_t *mod = nvme_info->nei_mod;
153 	nvlist_t *auth = NULL, *fmri = NULL;
154 	tnode_t *disk;
155 	char *rev = NULL, *model = NULL, *serial = NULL, *path;
156 	char *logical_disk = NULL, *devid, *manuf, *ctd = NULL;
157 	char *cap_bytes_str = NULL, full_path[MAXPATHLEN + 1];
158 	char *pname = topo_node_name(nvme_info->nei_parent);
159 	topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
160 	const char **ppaths = NULL;
161 	struct dk_minfo minfo;
162 	uint64_t cap_bytes;
163 	uint_t bufsz;
164 	int fd = -1, err, ret = -1, r;
165 
166 	if ((path = di_devfs_path(dinode)) == NULL) {
167 		topo_mod_dprintf(mod, "%s: failed to get dev path", __func__);
168 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
169 		return (ret);
170 	}
171 
172 	topo_mod_dprintf(mod, "%s: found nvme namespace: %s", __func__, path);
173 
174 	/*
175 	 * Issue the DKIOCGMEDIAINFO ioctl to get the capacity
176 	 */
177 	(void) snprintf(full_path, MAXPATHLEN, "/devices%s%s", path,
178 	    PHYS_EXTN);
179 	if ((fd = open(full_path, O_RDWR)) < 0 ||
180 	    ioctl(fd, DKIOCGMEDIAINFO, &minfo) < 0) {
181 		topo_mod_dprintf(mod, "failed to get blkdev capacity (%s)",
182 		    strerror(errno));
183 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
184 		goto error;
185 	}
186 
187 	cap_bytes = minfo.dki_lbsize * minfo.dki_capacity;
188 
189 	if (asprintf(&cap_bytes_str, "%" PRIu64, cap_bytes) < 0) {
190 		topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
191 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
192 		goto error;
193 	}
194 
195 	/*
196 	 * Gather the FRU identity information from the devinfo properties
197 	 */
198 	if (di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, DEVID_PROP_NAME,
199 	    &devid) == -1 ||
200 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_VENDOR_ID,
201 	    &manuf) == -1 ||
202 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_PRODUCT_ID,
203 	    &model) == -1 ||
204 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_REVISION_ID,
205 	    &rev) == -1 ||
206 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_SERIAL_NO,
207 	    &serial) == -1) {
208 		topo_mod_dprintf(mod, "%s: failed to lookup devinfo props on "
209 		    "%s", __func__, path);
210 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
211 		goto error;
212 	}
213 
214 	model = topo_mod_clean_str(mod, model);
215 	rev = topo_mod_clean_str(mod, rev);
216 	serial = topo_mod_clean_str(mod, serial);
217 
218 	/*
219 	 * Lookup the /dev/dsk/c#t#d# disk device name from the blkdev path
220 	 */
221 	if ((logical_disk = get_logical_disk(mod, path, &bufsz)) == NULL) {
222 		topo_mod_dprintf(mod, "failed to find logical disk");
223 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
224 		goto error;
225 	}
226 
227 	/*
228 	 * If we were able to look up the logical disk path for this namespace
229 	 * then set ctd to be that pathname, minus the "/dev/dsk/" portion.
230 	 */
231 	if ((ctd = strrchr(logical_disk, '/')) !=  NULL) {
232 		ctd = ctd + 1;
233 	} else {
234 		topo_mod_dprintf(mod, "malformed logical disk path: %s",
235 		    logical_disk);
236 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
237 		goto error;
238 	}
239 
240 	/*
241 	 * Build the FMRI and then bind the disk node to the parent nvme node.
242 	 */
243 	auth = topo_mod_auth(mod, nvme_info->nei_nvme);
244 	fmri = topo_mod_hcfmri(mod, nvme_info->nei_nvme, FM_HC_SCHEME_VERSION,
245 	    DISK, inst, NULL, auth, model, rev, serial);
246 
247 	if (fmri == NULL) {
248 		/* errno set */
249 		topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%" PRIu64
250 		    "/%s=0/%s=%" PRIu64, __func__, pname, pinst, NVME, DISK,
251 		    inst);
252 		goto error;
253 	}
254 	if ((disk = topo_node_bind(mod, nvme_info->nei_nvme, DISK, inst,
255 	    fmri)) == NULL) {
256 		/* errno set */
257 		topo_mod_dprintf(mod, "%s: bind failed for %s=%" PRIu64
258 		    "/%s=0/%s=%" PRIu64, __func__, pname, pinst, NVME, DISK,
259 		    inst);
260 		goto error;
261 	}
262 
263 	/* Create authority and system propgroups */
264 	topo_pgroup_hcset(disk, auth);
265 
266 	/*
267 	 * As the "disk" in this case is simply a logical construct
268 	 * representing an NVMe namespace, we inherit the FRU from the parent
269 	 * node.
270 	 */
271 	if (topo_node_fru_set(disk, NULL, 0, &err) != 0) {
272 		topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
273 		    topo_strerror(err));
274 		(void) topo_mod_seterrno(mod, err);
275 		goto error;
276 	}
277 
278 	if ((ppaths = topo_mod_zalloc(mod, sizeof (char *))) == NULL) {
279 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
280 		goto error;
281 	}
282 	ppaths[0] = path;
283 
284 	/*
285 	 * Create the "storage" and "io" property groups and then fill them
286 	 * with the standard set of properties for "disk" nodes.
287 	 */
288 	if (topo_pgroup_create(disk, &io_pgroup, &err) != 0 ||
289 	    topo_pgroup_create(disk, &storage_pgroup, &err) != 0) {
290 		topo_mod_dprintf(mod, "%s: failed to create propgroups: %s",
291 		    __func__, topo_strerror(err));
292 		(void) topo_mod_seterrno(mod, err);
293 		goto error;
294 	}
295 
296 	r = topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH,
297 	    TOPO_PROP_IMMUTABLE, path, &err);
298 
299 	r += topo_prop_set_string_array(disk, TOPO_PGROUP_IO,
300 	    TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err);
301 
302 	r += topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEVID,
303 	    TOPO_PROP_IMMUTABLE, devid, &err);
304 
305 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
306 	    TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, manuf, &err);
307 
308 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
309 	    TOPO_STORAGE_CAPACITY, TOPO_PROP_IMMUTABLE, cap_bytes_str,
310 	    &err);
311 
312 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
313 	    TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err);
314 
315 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
316 	    TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err);
317 
318 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
319 	    TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err);
320 
321 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
322 	    TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, ctd, &err);
323 
324 	if (r != 0) {
325 		topo_mod_dprintf(mod, "%s: failed to create properties: %s",
326 		    __func__, topo_strerror(err));
327 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
328 		goto error;
329 	}
330 
331 	ret = 0;
332 
333 error:
334 	free(cap_bytes_str);
335 	if (fd > 0)
336 		(void) close(fd);
337 	if (ppaths != NULL)
338 		topo_mod_free(mod, ppaths, sizeof (char *));
339 	di_devfs_path_free(path);
340 	nvlist_free(auth);
341 	nvlist_free(fmri);
342 	topo_mod_strfree(mod, rev);
343 	topo_mod_strfree(mod, model);
344 	topo_mod_strfree(mod, serial);
345 	topo_mod_free(mod, logical_disk, bufsz);
346 	return (ret);
347 }
348 
349 static const topo_pgroup_info_t nvme_pgroup = {
350 	TOPO_PGROUP_NVME,
351 	TOPO_STABILITY_PRIVATE,
352 	TOPO_STABILITY_PRIVATE,
353 	1
354 };
355 
356 
357 static int
358 make_nvme_node(nvme_enum_info_t *nvme_info)
359 {
360 	topo_mod_t *mod = nvme_info->nei_mod;
361 	nvlist_t *auth = NULL, *fmri = NULL, *fru;
362 	tnode_t *nvme;
363 	char raw_rev[NVME_FWVER_SZ + 1], raw_model[NVME_MODEL_SZ + 1];
364 	char raw_serial[NVME_SERIAL_SZ + 1];
365 	char *rev = NULL, *model = NULL, *serial = NULL, *vers = NULL;
366 	char *pname = topo_node_name(nvme_info->nei_parent);
367 	char *label = NULL;
368 	topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
369 	int err = 0, ret = -1;
370 	di_node_t cn;
371 	uint_t i;
372 
373 	/*
374 	 * The raw strings returned by the IDENTIFY CONTROLLER command are
375 	 * not NUL-terminated, so we fix that up.
376 	 */
377 	(void) strncpy(raw_rev, nvme_info->nei_idctl->id_fwrev, NVME_FWVER_SZ);
378 	raw_rev[NVME_FWVER_SZ] = '\0';
379 	(void) strncpy(raw_model, nvme_info->nei_idctl->id_model,
380 	    NVME_MODEL_SZ);
381 	raw_model[NVME_MODEL_SZ] = '\0';
382 	(void) strncpy(raw_serial, nvme_info->nei_idctl->id_serial,
383 	    NVME_SERIAL_SZ);
384 	raw_serial[NVME_SERIAL_SZ] = '\0';
385 
386 	/*
387 	 * Next we pass the strings through a function that sanitizes them of
388 	 * any characters that can't be used in an FMRI string.
389 	 */
390 	rev = topo_mod_clean_str(mod, raw_rev);
391 	model = topo_mod_clean_str(mod, raw_model);
392 	serial = topo_mod_clean_str(mod, raw_serial);
393 
394 	auth = topo_mod_auth(mod, nvme_info->nei_parent);
395 	fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION,
396 	    NVME, 0, NULL, auth, model, rev, serial);
397 
398 	if (fmri == NULL) {
399 		/* errno set */
400 		topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%" PRIu64
401 		    "/%s=0", __func__, pname, pinst, NVME);
402 		goto error;
403 	}
404 
405 	/*
406 	 * If our parent is a pciexfn node, then we need to create a nvme range
407 	 * underneath it to hold the nvme heirarchy.  For other cases, where
408 	 * enumeration is being driven by a topo map file, this range will have
409 	 * already been statically defined in the XML.
410 	 */
411 	if (strcmp(pname, PCIEX_FUNCTION) == 0) {
412 		if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0,
413 		    0) < 0) {
414 			/* errno set */
415 			topo_mod_dprintf(mod, "%s: error creating %s range",
416 			    __func__, NVME);
417 			goto error;
418 		}
419 	}
420 
421 	/*
422 	 * Create a new topo node to represent the NVMe controller and bind it
423 	 * to the parent node.
424 	 */
425 	if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0,
426 	    fmri)) == NULL) {
427 		/* errno set */
428 		topo_mod_dprintf(mod, "%s: bind failed for %s=%" PRIu64
429 		    "/%s=0", __func__, pname, pinst, NVME);
430 		goto error;
431 	}
432 	nvme_info->nei_nvme = nvme;
433 	nvme_info->nei_nvme_fmri = fmri;
434 
435 	/*
436 	 * If our parent node is a "pciexfn" node then this is a NVMe device on
437 	 * a PCIe AIC, so we inherit our parent's FRU.  Otherwise, we set the
438 	 * FRU to ourself.
439 	 */
440 	if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0)
441 		fru = NULL;
442 	else
443 		fru = fmri;
444 
445 	if (topo_node_fru_set(nvme, fru, 0, &err) != 0) {
446 		topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
447 		    topo_strerror(err));
448 		(void) topo_mod_seterrno(mod, err);
449 		goto error;
450 	}
451 
452 	/*
453 	 * Clone the label from our parent node.  We can't inherit the property
454 	 * because the label prop is mutable on bay nodes and only immutable
455 	 * properties can be inherited.
456 	 */
457 	if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 &&
458 	    err != ETOPO_PROP_NOENT) ||
459 	    topo_node_label_set(nvme, label, &err) != 0) {
460 		topo_mod_dprintf(mod, "%s: failed to set label: %s",
461 		    __func__, topo_strerror(err));
462 		(void) topo_mod_seterrno(mod, err);
463 		goto error;
464 	}
465 
466 	if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) {
467 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
468 		    __func__, TOPO_PGROUP_NVME, topo_strerror(err));
469 		(void) topo_mod_seterrno(mod, err);
470 		goto error;
471 	}
472 
473 	if (asprintf(&vers, "%u.%u", nvme_info->nei_vers.v_major,
474 	    nvme_info->nei_vers.v_minor) < 0) {
475 		topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
476 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
477 		goto error;
478 	}
479 	if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER,
480 	    TOPO_PROP_IMMUTABLE, vers, &err) != 0) {
481 		topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
482 		    __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER);
483 		(void) topo_mod_seterrno(mod, err);
484 		goto error;
485 	}
486 
487 	if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) {
488 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
489 		    __func__, TOPO_PGROUP_IO, topo_strerror(err));
490 		(void) topo_mod_seterrno(mod, err);
491 		goto error;
492 	}
493 	if (topo_prop_set_string(nvme, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH,
494 	    TOPO_PROP_IMMUTABLE, nvme_info->nei_nvme_path, &err) != 0) {
495 		topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
496 		    __func__, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH);
497 		(void) topo_mod_seterrno(mod, err);
498 		goto error;
499 	}
500 
501 	/*
502 	 * Create a child disk node for each namespace.
503 	 */
504 	if (topo_node_range_create(mod, nvme, DISK, 0,
505 	    (nvme_info->nei_idctl->id_nn - 1)) < 0) {
506 		/* errno set */
507 		topo_mod_dprintf(mod, "%s: error creating %s range", __func__,
508 		    DISK);
509 		goto error;
510 	}
511 
512 	for (i = 0, cn = di_child_node(nvme_info->nei_dinode);
513 	    cn != DI_NODE_NIL;
514 	    i++, cn = di_sibling_node(cn)) {
515 
516 		if (make_disk_node(nvme_info, cn, i) != 0) {
517 			char *path = di_devfs_path(cn);
518 			/*
519 			 * We note the failure, but attempt to forge ahead and
520 			 * enumerate any other namespaces.
521 			 */
522 			topo_mod_dprintf(mod, "%s: make_disk_node() failed "
523 			    "for %s\n", __func__,
524 			    path ? path : "unknown path");
525 			di_devfs_path_free(path);
526 		}
527 	}
528 	ret = 0;
529 
530 error:
531 	free(vers);
532 	nvlist_free(auth);
533 	nvlist_free(fmri);
534 	topo_mod_strfree(mod, rev);
535 	topo_mod_strfree(mod, model);
536 	topo_mod_strfree(mod, serial);
537 	topo_mod_strfree(mod, label);
538 	return (ret);
539 }
540 
541 struct diwalk_arg {
542 	topo_mod_t	*diwk_mod;
543 	tnode_t		*diwk_parent;
544 };
545 
546 /*
547  * This function gathers identity information from the NVMe controller and
548  * stores it in a struct.  This struct is passed to make_nvme_node(), which
549  * does the actual topo node creation.
550  */
551 static int
552 discover_nvme_ctl(di_node_t node, di_minor_t minor, void *arg)
553 {
554 	struct diwalk_arg *wkarg = arg;
555 	topo_mod_t *mod = wkarg->diwk_mod;
556 	char *path = NULL, *devctl = NULL;
557 	nvme_ioctl_t nioc = { 0 };
558 	nvme_identify_ctrl_t *idctl = NULL;
559 	nvme_enum_info_t nvme_info = { 0 };
560 	int fd = -1, ret = DI_WALK_TERMINATE;
561 
562 	if ((path = di_devfs_minor_path(minor)) == NULL) {
563 		topo_mod_dprintf(mod, "failed to get minor path");
564 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
565 		return (ret);
566 	}
567 
568 	topo_mod_dprintf(mod, "%s=%" PRIu64 ": found nvme controller: %s",
569 	    topo_node_name(wkarg->diwk_parent),
570 	    topo_node_instance(wkarg->diwk_parent), path);
571 
572 	if (asprintf(&devctl, "/devices%s", path) < 0) {
573 		topo_mod_dprintf(mod, "failed to alloc string");
574 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
575 		goto error;
576 	}
577 
578 	if ((fd = open(devctl, O_RDWR)) < 0) {
579 		topo_mod_dprintf(mod, "failed to open %s", devctl);
580 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
581 		goto error;
582 	}
583 	if ((idctl = topo_mod_zalloc(mod, NVME_IDENTIFY_BUFSIZE)) == NULL) {
584 		topo_mod_dprintf(mod, "zalloc failed");
585 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
586 		goto error;
587 	}
588 	nioc.n_len = NVME_IDENTIFY_BUFSIZE;
589 	nioc.n_buf = (uintptr_t)idctl;
590 	nioc.n_arg = NVME_IDENTIFY_CTRL;
591 
592 	if (ioctl(fd, NVME_IOC_IDENTIFY, &nioc) != 0) {
593 		topo_mod_dprintf(mod, "NVME_IOC_IDENTIFY ioctl "
594 		    "failed: %s", strerror(errno));
595 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
596 		goto error;
597 	}
598 
599 	nioc.n_len = sizeof (nvme_version_t);
600 	nioc.n_buf = (uintptr_t)&nvme_info.nei_vers;
601 	nioc.n_arg = 0;
602 
603 	if (ioctl(fd, NVME_IOC_VERSION, &nioc) != 0) {
604 		topo_mod_dprintf(mod, "NVME_IOC_VERSION ioctl failed: %s",
605 		    strerror(errno));
606 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
607 		goto error;
608 	}
609 
610 	nvme_info.nei_mod = mod;
611 	nvme_info.nei_nvme_path = path;
612 	nvme_info.nei_dinode = node;
613 	nvme_info.nei_idctl = idctl;
614 	nvme_info.nei_parent = wkarg->diwk_parent;
615 	nvme_info.nei_fd = fd;
616 
617 	if (make_nvme_node(&nvme_info) != 0) {
618 		/* errno set */
619 		goto error;
620 	}
621 
622 	ret = DI_WALK_CONTINUE;
623 
624 error:
625 	if (fd > 0)
626 		(void) close(fd);
627 	di_devfs_path_free(path);
628 	free(devctl);
629 	if (idctl != NULL)
630 		topo_mod_free(mod, idctl, NVME_IDENTIFY_BUFSIZE);
631 	return (ret);
632 }
633 
634 int
635 disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode)
636 {
637 	char *parent = NULL;
638 	int err;
639 	di_node_t devtree;
640 	di_node_t dnode;
641 	struct diwalk_arg wkarg = { 0 };
642 	int ret = -1;
643 
644 	/*
645 	 * Lookup a property containing the devfs path of the parent PCIe
646 	 * device of the NVMe device we're attempting to enumerate.  This
647 	 * property is hard-coded in per-platform topo XML maps that are
648 	 * delivered with the OS.  This hard-coded path allows topo to map a
649 	 * given NVMe controller to a physical location (bay or slot) on the
650 	 * platform, when generating the topo snapshot.
651 	 */
652 	if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING,
653 	    TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) {
654 		topo_mod_dprintf(mod, "parent node was missing nvme binding "
655 		    "properties\n");
656 		(void) topo_mod_seterrno(mod, err);
657 		goto out;
658 	}
659 	if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) {
660 		topo_mod_dprintf(mod, "failed to get devinfo snapshot");
661 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
662 		goto out;
663 	}
664 
665 	/*
666 	 * Walk the devinfo tree looking NVMe devices. For each NVMe device,
667 	 * check if the devfs path of the parent matches the one specified in
668 	 * TOPO_BINDING_PARENT_DEV.
669 	 */
670 	wkarg.diwk_mod = mod;
671 	wkarg.diwk_parent = pnode;
672 	dnode = di_drv_first_node(NVME_DRV, devtree);
673 	while (dnode != DI_NODE_NIL) {
674 		char *path;
675 
676 		if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) {
677 			topo_mod_dprintf(mod, "failed to get dev path");
678 			(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
679 			goto out;
680 		}
681 		if (strcmp(parent, path) == 0) {
682 			if (di_walk_minor(dnode, DDI_NT_NVME_NEXUS, 0,
683 			    &wkarg, discover_nvme_ctl) < 0) {
684 				di_devfs_path_free(path);
685 				goto out;
686 			}
687 		}
688 		di_devfs_path_free(path);
689 		dnode = di_drv_next_node(dnode);
690 	}
691 	ret = 0;
692 
693 out:
694 	topo_mod_strfree(mod, parent);
695 	return (ret);
696 }
697