1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <unistd.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdarg.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <limits.h>
36 #include <alloca.h>
37 #include <kstat.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <libnvpair.h>
41 #include <sys/types.h>
42 #include <sys/bitmap.h>
43 #include <sys/processor.h>
44 #include <sys/param.h>
45 #include <sys/fm/protocol.h>
46 #include <sys/systeminfo.h>
47 #include <sys/mc.h>
48 #include <sys/mc_amd.h>
49 #include <fm/topo_mod.h>
50 
51 #include "chip.h"
52 
53 #ifndef MAX
54 #define	MAX(a, b)	((a) > (b) ? (a) : (b))
55 #endif
56 
57 #define	MAX_DIMMNUM	7
58 #define	MAX_CSNUM	7
59 
60 /*
61  * Enumerates the processing chips, or sockets, (as distinct from cores) in a
62  * system.  For each chip found, the necessary nodes (one or more cores, and
63  * possibly a memory controller) are constructed underneath.
64  */
65 
66 static int chip_enum(topo_mod_t *, tnode_t *, const char *, topo_instance_t,
67     topo_instance_t, void *, void *);
68 
69 static int mem_asru_compute(topo_mod_t *, tnode_t *, topo_version_t,
70     nvlist_t *, nvlist_t **);
71 
72 static const topo_modops_t chip_ops =
73 	{ chip_enum, NULL};
74 static const topo_modinfo_t chip_info =
75 	{ CHIP_NODE_NAME, FM_FMRI_SCHEME_HC, CHIP_VERSION, &chip_ops };
76 
77 static const topo_pgroup_info_t cs_pgroup =
78 	{ PGNAME(CS), TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 };
79 static const topo_pgroup_info_t dimm_pgroup =
80 	{ PGNAME(DIMM), TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 };
81 static const topo_pgroup_info_t mc_pgroup =
82 	{ PGNAME(MCT), TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 };
83 static const topo_pgroup_info_t chip_pgroup =
84 	{ PGNAME(CHIP), TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 };
85 static const topo_pgroup_info_t cpu_pgroup =
86 	{ PGNAME(CPU), TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 };
87 static const topo_pgroup_info_t rank_pgroup =
88 	{ PGNAME(RANK), TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 };
89 static const topo_pgroup_info_t chan_pgroup =
90 	{ PGNAME(CHAN), TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 };
91 
92 const topo_method_t rank_methods[] = {
93 	{ TOPO_METH_ASRU_COMPUTE, TOPO_METH_ASRU_COMPUTE_DESC,
94 	    TOPO_METH_ASRU_COMPUTE_VERSION, TOPO_STABILITY_INTERNAL,
95 	    mem_asru_compute },
96 	{ NULL }
97 };
98 
99 static nvlist_t *cs_fmri[MC_CHIP_NCS];
100 
101 static void
102 whinge(topo_mod_t *mod, int *nerr, const char *fmt, ...)
103 {
104 	va_list ap;
105 	char buf[160];
106 
107 	if (nerr != NULL)
108 		++*nerr;
109 
110 	va_start(ap, fmt);
111 	(void) vsnprintf(buf, sizeof (buf), fmt, ap);
112 	va_end(ap);
113 
114 	topo_mod_dprintf(mod, "%s", buf);
115 }
116 
117 int
118 _topo_init(topo_mod_t *mod)
119 {
120 	chip_t *chip;
121 
122 	if (getenv("TOPOCHIPDBG"))
123 		topo_mod_setdebug(mod);
124 	topo_mod_dprintf(mod, "initializing chip enumerator\n");
125 
126 	if ((chip = topo_mod_zalloc(mod, sizeof (chip_t))) == NULL)
127 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
128 
129 	if ((chip->chip_kc = kstat_open()) == NULL) {
130 		whinge(mod, NULL, "kstat_open failed: %s\n",
131 		    strerror(errno));
132 		topo_mod_free(mod, chip, sizeof (chip_t));
133 		return (topo_mod_seterrno(mod, errno));
134 	}
135 
136 	chip->chip_ncpustats = sysconf(_SC_CPUID_MAX);
137 	if ((chip->chip_cpustats = topo_mod_zalloc(mod, (
138 	    chip->chip_ncpustats + 1) * sizeof (kstat_t *))) == NULL) {
139 		(void) kstat_close(chip->chip_kc);
140 		topo_mod_free(mod, chip, sizeof (chip_t));
141 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
142 	}
143 
144 	if (topo_mod_register(mod, &chip_info, TOPO_VERSION) != 0) {
145 		whinge(mod, NULL, "failed to register hc: "
146 		    "%s\n", topo_mod_errmsg(mod));
147 		topo_mod_free(mod, chip->chip_cpustats,
148 		    (chip->chip_ncpustats + 1) * sizeof (kstat_t *));
149 		(void) kstat_close(chip->chip_kc);
150 		topo_mod_free(mod, chip, sizeof (chip_t));
151 		return (-1); /* mod errno set */
152 	}
153 	topo_mod_setspecific(mod, (void *)chip);
154 
155 	return (0);
156 }
157 
158 void
159 _topo_fini(topo_mod_t *mod)
160 {
161 	chip_t *chip = topo_mod_getspecific(mod);
162 
163 	if (chip->chip_cpustats != NULL)
164 		topo_mod_free(mod, chip->chip_cpustats,
165 		    (chip->chip_ncpustats + 1) * sizeof (kstat_t *));
166 
167 	(void) kstat_close(chip->chip_kc);
168 	topo_mod_free(mod, chip, sizeof (chip_t));
169 
170 	topo_mod_unregister(mod);
171 }
172 
173 static int
174 add_kstat_strprop(topo_mod_t *mod, tnode_t *node, kstat_t *ksp,
175     const char *pgname, const char *pname)
176 {
177 	int err = 0;
178 	kstat_named_t *k;
179 
180 	if ((k = kstat_data_lookup(ksp, (char *)pname)) == NULL)
181 		return (-1);
182 
183 	if (topo_prop_set_string(node, pgname, pname,
184 	    TOPO_PROP_IMMUTABLE, k->value.str.addr.ptr, &err) == 0) {
185 		return (0);
186 	} else {
187 		whinge(mod, &err, "chip_strprop: failed to add '%s'\n",
188 		    pname);
189 		return (-1);
190 	}
191 }
192 
193 static int
194 add_kstat_longprop(topo_mod_t *mod, tnode_t *node, kstat_t *ksp,
195     const char *pgname, const char *pname)
196 {
197 	int err;
198 	kstat_named_t *k;
199 
200 	if ((k = kstat_data_lookup(ksp, (char *)pname)) == NULL)
201 		return (-1);
202 
203 	if (topo_prop_set_int32(node, pgname, pname,
204 	    TOPO_PROP_IMMUTABLE, k->value.l, &err) == 0) {
205 		return (0);
206 	} else {
207 		whinge(mod, &err, "chip_longprop: failed to add '%s'\n",
208 		    pname);
209 		return (-1);
210 	}
211 }
212 
213 static int
214 add_kstat_longprops(topo_mod_t *mod, tnode_t *node, kstat_t *ksp,
215     const char *pgname, ...)
216 {
217 	const char *pname;
218 	va_list ap;
219 	int nerr = 0;
220 
221 	va_start(ap, pgname);
222 	while ((pname = va_arg(ap, const char *)) != NULL) {
223 		if (add_kstat_longprop(mod, node, ksp, pgname, pname) != 0)
224 			nerr++;		/* have whinged elsewhere */
225 	}
226 	va_end(ap);
227 
228 	return (nerr == 0 ? 0 : -1);
229 }
230 
231 static int
232 mkrsrc(topo_mod_t *mod, tnode_t *pnode, const char *name, int inst,
233     nvlist_t *auth, nvlist_t **nvl)
234 {
235 	*nvl = topo_mod_hcfmri(mod, pnode, FM_HC_SCHEME_VERSION, name,
236 	    inst, NULL, auth, NULL, NULL, NULL);
237 	return (nvl != NULL ? 0 : -1);	/* caller must free nvlist */
238 }
239 
240 static nvlist_t *
241 cpu_fmri_create(topo_mod_t *mod, uint32_t cpuid, char *s, uint8_t cpumask)
242 {
243 	int err;
244 	nvlist_t *asru;
245 
246 	if (topo_mod_nvalloc(mod, &asru, NV_UNIQUE_NAME) != 0)
247 		return (NULL);
248 
249 	err = nvlist_add_uint8(asru, FM_VERSION, FM_CPU_SCHEME_VERSION);
250 	err |= nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_CPU);
251 	err |= nvlist_add_uint32(asru, FM_FMRI_CPU_ID, cpuid);
252 	err |= nvlist_add_uint8(asru, FM_FMRI_CPU_MASK, cpumask);
253 	if (s != NULL)
254 		err |= nvlist_add_string(asru, FM_FMRI_CPU_SERIAL_ID, s);
255 	if (err != 0) {
256 		nvlist_free(asru);
257 		(void) topo_mod_seterrno(mod, EMOD_FMRI_NVL);
258 		return (NULL);
259 	}
260 
261 	return (asru);
262 }
263 
264 static nvlist_t *
265 mem_fmri_create(topo_mod_t *mod)
266 {
267 	nvlist_t *asru;
268 
269 	if (topo_mod_nvalloc(mod, &asru, NV_UNIQUE_NAME) != 0)
270 		return (NULL);
271 
272 	if (nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0 ||
273 	    nvlist_add_uint8(asru, FM_VERSION, FM_MEM_SCHEME_VERSION) != 0) {
274 		nvlist_free(asru);
275 		return (NULL);
276 	}
277 
278 	return (asru);
279 }
280 
281 static int
282 cpu_create(topo_mod_t *mod, tnode_t *pnode, const char *name, int chipid,
283     chip_t *chip, nvlist_t *auth)
284 {
285 	kstat_named_t *k;
286 	nvlist_t *fmri, *asru;
287 	tnode_t *cnode;
288 	int err, nerr = 0;
289 	int clogid, cpuid;
290 
291 	if (topo_node_range_create(mod, pnode, name, 0,
292 	    chip->chip_ncpustats) < 0)
293 		return (-1);
294 
295 	for (cpuid = 0; cpuid <= chip->chip_ncpustats; cpuid++) {
296 		if (chip->chip_cpustats[cpuid] == NULL)
297 			continue;
298 
299 		/*
300 		 * The chip_id in the cpu_info kstat numbers the individual
301 		 * chips from 0 to #chips - 1.
302 		 */
303 		if ((k = kstat_data_lookup(chip->chip_cpustats[cpuid],
304 		    "chip_id")) == NULL) {
305 			whinge(mod, &nerr, "cpu_create: chip_id lookup via "
306 			    "kstats failed\n");
307 			continue;
308 		}
309 
310 		if (k->value.l != chipid)
311 			continue;	/* not an error */
312 
313 		/*
314 		 * The clog_id in the cpu_info kstat numbers the virtual
315 		 * processors of a single chip;  these may be separate
316 		 * processor cores, or they may be hardware threads/strands
317 		 * of individual cores.
318 		 *
319 		 * The core_id in the cpu_info kstat tells us which cpus
320 		 * share the same core - i.e., are hardware strands of the
321 		 * same core.  This enumerator does not distinguish stranded
322 		 * cores so core_id is unused.
323 		 */
324 		if ((k = kstat_data_lookup(chip->chip_cpustats[cpuid],
325 		    "clog_id")) == NULL) {
326 			whinge(mod, &nerr, "cpu_create: clog_id lookup via "
327 			    "kstats failed\n");
328 			continue;
329 		}
330 		clogid = k->value.l;
331 
332 		if (mkrsrc(mod, pnode, name, clogid, auth, &fmri) != 0) {
333 			whinge(mod, &nerr, "cpu_create: mkrsrc failed\n");
334 			continue;
335 		}
336 
337 		if ((cnode = topo_node_bind(mod, pnode, name, clogid, fmri))
338 		    == NULL) {
339 			whinge(mod, &nerr, "cpu_create: node bind failed\n");
340 			nvlist_free(fmri);
341 			continue;
342 		}
343 		nvlist_free(fmri);
344 
345 		if ((asru = cpu_fmri_create(mod, cpuid, NULL, 0)) != NULL) {
346 			(void) topo_node_asru_set(cnode, asru, 0, &err);
347 			nvlist_free(asru);
348 		} else {
349 			whinge(mod, &nerr, "cpu_create: cpu_fmri_create "
350 			    "failed\n");
351 		}
352 		(void) topo_node_fru_set(cnode, NULL, 0, &err);
353 
354 		(void) topo_pgroup_create(cnode, &cpu_pgroup, &err);
355 
356 		(void) topo_prop_set_uint32(cnode, PGNAME(CPU), "cpuid",
357 		    TOPO_PROP_IMMUTABLE, cpuid, &err);
358 
359 		if (add_kstat_longprops(mod, cnode, chip->chip_cpustats[cpuid],
360 		    PGNAME(CPU), CPU_CHIP_ID, CPU_CORE_ID, CPU_CLOG_ID,
361 		    NULL) != 0)
362 			nerr++;		/* have whinged elsewhere */
363 	}
364 
365 	return (nerr == 0 ? 0 : -1);
366 }
367 
368 static int
369 nvprop_add(topo_mod_t *mod, nvpair_t *nvp, const char *pgname, tnode_t *node)
370 {
371 	int success = 0;
372 	int err;
373 	char *pname = nvpair_name(nvp);
374 
375 	switch (nvpair_type(nvp)) {
376 	case DATA_TYPE_BOOLEAN_VALUE: {
377 		boolean_t val;
378 
379 		if (nvpair_value_boolean_value(nvp, &val) == 0 &&
380 		    topo_prop_set_string(node, pgname, pname,
381 		    TOPO_PROP_IMMUTABLE, val ? "true" : "false", &err) == 0)
382 			success = 1;
383 		break;
384 	}
385 
386 	case DATA_TYPE_UINT32: {
387 		uint32_t val;
388 
389 		if (nvpair_value_uint32(nvp, &val) == 0 &&
390 		    topo_prop_set_uint32(node, pgname, pname,
391 		    TOPO_PROP_IMMUTABLE, val, &err) == 0)
392 			success = 1;
393 		break;
394 	}
395 
396 	case DATA_TYPE_UINT64: {
397 		uint64_t val;
398 
399 		if (nvpair_value_uint64(nvp, &val) == 0 &&
400 		    topo_prop_set_uint64(node, pgname, pname,
401 		    TOPO_PROP_IMMUTABLE, val, &err) == 0)
402 			success = 1;
403 		break;
404 	}
405 
406 	case DATA_TYPE_UINT32_ARRAY: {
407 		uint32_t *arrp;
408 		uint_t nelem;
409 
410 		if (nvpair_value_uint32_array(nvp, &arrp, &nelem) == 0 &&
411 		    nelem > 0 && topo_prop_set_uint32_array(node, pgname, pname,
412 		    TOPO_PROP_IMMUTABLE, arrp, nelem, &err) == 0)
413 			success = 1;
414 		break;
415 	}
416 
417 	case DATA_TYPE_STRING: {
418 		char *str;
419 
420 		if (nvpair_value_string(nvp, &str) == 0 &&
421 		    topo_prop_set_string(node, pgname, pname,
422 		    TOPO_PROP_IMMUTABLE, str, &err) == 0)
423 			success = 1;
424 		break;
425 	}
426 
427 	default:
428 		whinge(mod, &err, "nvprop_add: Can't handle type %d for "
429 		    "'%s' in property group %s of %s node\n",
430 		    nvpair_type(nvp), pname, pgname, topo_node_name(node));
431 		break;
432 	}
433 
434 	return (success ? 0 : 1);
435 }
436 
437 static int
438 chip_htconfig(topo_mod_t *mod, tnode_t *cnode, nvlist_t *htnvl)
439 {
440 	nvpair_t *nvp;
441 	int nerr = 0;
442 
443 	if (strcmp(topo_node_name(cnode), CHIP_NODE_NAME) != 0) {
444 		whinge(mod, &nerr, "chip_htconfig: must pass a chip node!");
445 		return (-1);
446 	}
447 
448 	for (nvp = nvlist_next_nvpair(htnvl, NULL); nvp != NULL;
449 	    nvp = nvlist_next_nvpair(htnvl, nvp)) {
450 		if (nvprop_add(mod, nvp, PGNAME(CHIP), cnode) != 0)
451 			nerr++;
452 	}
453 
454 	return (nerr == 0 ? 0 : -1);
455 }
456 
457 static int
458 dramchan_create(topo_mod_t *mod, tnode_t *pnode, const char *name,
459     nvlist_t *auth)
460 {
461 	tnode_t *chnode;
462 	nvlist_t *fmri;
463 	char *socket;
464 	int i, nchan;
465 	int err, nerr = 0;
466 
467 	/*
468 	 * We will enumerate the number of channels present even if only
469 	 * channel A is in use (i.e., running in 64-bit mode).  Only
470 	 * the socket 754 package has a single channel.
471 	 */
472 	if (topo_prop_get_string(pnode, PGNAME(MCT), "socket",
473 	    &socket, &err) != 0)
474 		return (-1);
475 
476 	if (strcmp(socket, "Socket 754") == 0)
477 		nchan = 1;
478 	else
479 		nchan = 2;
480 
481 	topo_mod_strfree(mod, socket);
482 
483 	if (topo_node_range_create(mod, pnode, name, 0, nchan - 1) < 0)
484 		return (-1);
485 
486 	for (i = 0; i < nchan; i++) {
487 		if (mkrsrc(mod, pnode, name, i, auth, &fmri) != 0) {
488 			whinge(mod, &nerr, "dramchan_create: mkrsrc "
489 			    "failed\n");
490 			continue;
491 		}
492 
493 		if ((chnode = topo_node_bind(mod, pnode, name, i, fmri))
494 		    == NULL) {
495 			nvlist_free(fmri);
496 			whinge(mod, &nerr, "dramchan_create: node bind "
497 			    "failed\n");
498 			continue;
499 		}
500 
501 		nvlist_free(fmri);
502 
503 		(void) topo_pgroup_create(chnode, &chan_pgroup, &err);
504 
505 		(void) topo_prop_set_string(chnode, PGNAME(CHAN), "channel",
506 		    TOPO_PROP_IMMUTABLE, i == 0 ? "A" : "B", &err);
507 	}
508 
509 	return (nerr == 0 ? 0 : -1);
510 }
511 
512 static int
513 cs_create(topo_mod_t *mod, tnode_t *pnode, const char *name, nvlist_t *mc,
514     nvlist_t *auth)
515 {
516 	int i, err, nerr = 0;
517 	nvpair_t *nvp;
518 	tnode_t *csnode;
519 	nvlist_t *fmri, **csarr = NULL;
520 	uint64_t csnum;
521 	uint_t ncs;
522 
523 	if (nvlist_lookup_nvlist_array(mc, "cslist", &csarr, &ncs) != 0)
524 		return (-1);
525 
526 	if (ncs == 0)
527 		return (0);	/* no chip-selects configured on this node */
528 
529 	if (topo_node_range_create(mod, pnode, name, 0, MAX_CSNUM) < 0)
530 		return (-1);
531 
532 	for (i = 0; i < ncs; i++) {
533 		if (nvlist_lookup_uint64(csarr[i], "num", &csnum) != 0) {
534 			whinge(mod, &nerr, "cs_create: cs num property "
535 			    "missing\n");
536 			continue;
537 		}
538 
539 		if (mkrsrc(mod, pnode, name, csnum, auth, &fmri) != 0) {
540 			whinge(mod, &nerr, "cs_create: mkrsrc failed\n");
541 			continue;
542 		}
543 
544 		if ((csnode = topo_node_bind(mod, pnode, name, csnum, fmri))
545 		    == NULL) {
546 			nvlist_free(fmri);
547 			whinge(mod, &nerr, "cs_create: node bind failed\n");
548 			continue;
549 		}
550 
551 		cs_fmri[csnum] = fmri;	/* nvlist will be freed in mc_create */
552 
553 		(void) topo_node_asru_set(csnode, fmri, 0, &err);
554 
555 		(void) topo_pgroup_create(csnode, &cs_pgroup, &err);
556 
557 		for (nvp = nvlist_next_nvpair(csarr[i], NULL); nvp != NULL;
558 		    nvp = nvlist_next_nvpair(csarr[i], nvp)) {
559 			nerr += nvprop_add(mod, nvp, PGNAME(CS), csnode);
560 		}
561 	}
562 
563 	return (nerr == 0 ? 0 : -1);
564 }
565 
566 /*
567  * Registered method for asru computation for rank nodes.  The 'node'
568  * argument identifies the node for which we seek an asru.  The 'in'
569  * argument is used to select which asru we will return, as follows:
570  *
571  * - the node name must be "dimm" or "rank"
572  * - if 'in' is NULL then return any statically defined asru for this node
573  * - if 'in' is an "hc" scheme fmri then we construct a "mem" scheme asru
574  *   with unum being the hc path to the dimm or rank (this method is called
575  *   as part of dynamic asru computation for rank nodes only, but dimm_create
576  *   also calls it directly to construct a "mem" scheme asru for a dimm node)
577  * - if 'in' in addition includes an hc-specific member which specifies
578  *   asru-physaddr or asru-offset then these are includes in the "mem" scheme
579  *   asru as additional membersl physaddr and offset
580  */
581 static int
582 mem_asru_create(topo_mod_t *mod, nvlist_t *fmri, nvlist_t **asru)
583 {
584 	int incl_pa = 0, incl_offset = 0;
585 	nvlist_t *hcsp, *ap;
586 	char *unum, *scheme;
587 	uint64_t pa, offset;
588 	int err;
589 
590 	if (nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &scheme) != 0 ||
591 	    strcmp(scheme, FM_FMRI_SCHEME_HC) != 0)
592 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
593 
594 	if (nvlist_lookup_nvlist(fmri, FM_FMRI_HC_SPECIFIC, &hcsp) == 0) {
595 		if (nvlist_lookup_uint64(hcsp, "asru-"FM_FMRI_MEM_PHYSADDR,
596 		    &pa) == 0)
597 			incl_pa = 1;
598 
599 		if (nvlist_lookup_uint64(hcsp, "asru-"FM_FMRI_MEM_OFFSET,
600 		    &offset) == 0)
601 			incl_offset = 1;
602 	}
603 
604 	/* use 'fmri' to obtain resource path;  could use node resource */
605 	if (topo_mod_nvl2str(mod, fmri, &unum) < 0)
606 		return (-1);  /* mod errno set */
607 
608 	if ((ap = mem_fmri_create(mod)) == NULL) {
609 		topo_mod_strfree(mod, unum);
610 		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));
611 	}
612 
613 	err = nvlist_add_string(ap, FM_FMRI_MEM_UNUM, unum);
614 	if (incl_pa)
615 		err |= nvlist_add_uint64(ap, FM_FMRI_MEM_PHYSADDR, pa);
616 	if (incl_offset)
617 		err |= nvlist_add_uint64(ap, FM_FMRI_MEM_OFFSET, offset);
618 
619 	topo_mod_strfree(mod, unum);
620 	if (err != 0) {
621 		nvlist_free(ap);
622 		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));
623 	}
624 
625 	*asru = ap;
626 
627 	return (0);
628 }
629 
630 /*ARGSUSED*/
631 static int
632 mem_asru_compute(topo_mod_t *mod, tnode_t *node, topo_version_t version,
633     nvlist_t *in, nvlist_t **out)
634 {
635 	nvlist_t *asru;
636 	nvlist_t *args, *pargs;
637 	int err;
638 
639 	if (strcmp(topo_node_name(node), RANK_NODE_NAME) != 0 &&
640 	    strcmp(topo_node_name(node), DIMM_NODE_NAME) != 0)
641 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
642 
643 	if (nvlist_lookup_nvlist(in, TOPO_PROP_ARGS, &args) != 0)
644 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
645 
646 	if ((err = nvlist_lookup_nvlist(in, TOPO_PROP_PARGS, &pargs)) != 0) {
647 		if (err == ENOENT) {
648 			if (topo_mod_nvdup(mod, args, &asru) < 0)
649 				return (topo_mod_seterrno(mod, EMOD_NOMEM));
650 		} else {
651 			return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
652 		}
653 	} else if (mem_asru_create(mod, pargs, &asru) != 0) {
654 		return (-1); /* mod errno already set */
655 	}
656 
657 	if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) < 0) {
658 		nvlist_free(asru);
659 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
660 	}
661 
662 	err = nvlist_add_string(*out, TOPO_PROP_VAL_NAME, TOPO_PROP_ASRU);
663 	err |= nvlist_add_uint32(*out, TOPO_PROP_VAL_TYPE, TOPO_TYPE_FMRI);
664 	err |= nvlist_add_nvlist(*out, TOPO_PROP_VAL_VAL, asru);
665 	if (err != 0) {
666 		nvlist_free(asru);
667 		nvlist_free(*out);
668 		return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
669 	}
670 
671 	nvlist_free(asru);
672 
673 	return (0);
674 }
675 
676 static int
677 rank_create(topo_mod_t *mod, tnode_t *pnode, nvlist_t *dimmnvl, nvlist_t *auth)
678 {
679 	uint64_t *csnumarr;
680 	char **csnamearr;
681 	uint_t ncs, ncsname;
682 	tnode_t *ranknode;
683 	nvlist_t *fmri, *pfmri = NULL;
684 	uint64_t dsz, rsz;
685 	int nerr = 0;
686 	int err;
687 	int i;
688 
689 	if (nvlist_lookup_uint64_array(dimmnvl, "csnums", &csnumarr,
690 	    &ncs) != 0 || nvlist_lookup_string_array(dimmnvl, "csnames",
691 	    &csnamearr, &ncsname) != 0 || ncs != ncsname) {
692 		whinge(mod, &nerr, "rank_create: "
693 		    "csnums/csnames extraction failed\n");
694 		    return (nerr);
695 	}
696 
697 	if (topo_node_resource(pnode, &pfmri, &err) < 0) {
698 		whinge(mod, &nerr, "rank_create: parent fmri lookup "
699 		    "failed\n");
700 		return (nerr);
701 	}
702 
703 	if (topo_node_range_create(mod, pnode, RANK_NODE_NAME, 0, ncs) < 0) {
704 		whinge(mod, &nerr, "rank_create: range create failed\n");
705 		nvlist_free(pfmri);
706 		return (nerr);
707 	}
708 
709 	if (topo_prop_get_uint64(pnode, PGNAME(DIMM), "size", &dsz,
710 	    &err) == 0) {
711 		rsz = dsz / ncs;
712 	} else {
713 		whinge(mod, &nerr, "rank_create: parent dimm has no size\n");
714 		return (nerr);
715 	}
716 
717 	for (i = 0; i < ncs; i++) {
718 		if (mkrsrc(mod, pnode, RANK_NODE_NAME, i, auth, &fmri) < 0) {
719 			whinge(mod, &nerr, "rank_create: mkrsrc failed\n");
720 			continue;
721 		}
722 
723 		if ((ranknode = topo_node_bind(mod, pnode, RANK_NODE_NAME, i,
724 		    fmri)) == NULL) {
725 			nvlist_free(fmri);
726 			whinge(mod, &nerr, "rank_create: node bind "
727 			    "failed\n");
728 			continue;
729 		}
730 
731 		nvlist_free(fmri);
732 
733 		(void) topo_node_fru_set(ranknode, pfmri, 0, &err);
734 
735 		/*
736 		 * If a rank is faulted the asru is the associated
737 		 * chip-select, but if a page within a rank is faulted
738 		 * the asru is just that page.  Hence the dual preconstructed
739 		 * and computed ASRU.
740 		 */
741 		if (topo_method_register(mod, ranknode, rank_methods) < 0)
742 			whinge(mod, &nerr, "rank_create: "
743 			    "topo_method_register failed");
744 
745 		(void) topo_node_asru_set(ranknode, cs_fmri[csnumarr[i]],
746 			    TOPO_ASRU_COMPUTE, &err);
747 
748 		(void) topo_pgroup_create(ranknode, &rank_pgroup, &err);
749 
750 		(void) topo_prop_set_uint64(ranknode, PGNAME(RANK), "size",
751 		    TOPO_PROP_IMMUTABLE, rsz, &err);
752 
753 		(void) topo_prop_set_string(ranknode, PGNAME(RANK), "csname",
754 		    TOPO_PROP_IMMUTABLE, csnamearr[i], &err);
755 
756 		(void) topo_prop_set_uint64(ranknode, PGNAME(RANK), "csnum",
757 		    TOPO_PROP_IMMUTABLE, csnumarr[i], &err);
758 	}
759 
760 	nvlist_free(pfmri);
761 
762 	return (nerr);
763 }
764 
765 static int
766 dimm_create(topo_mod_t *mod, tnode_t *pnode, const char *name, nvlist_t *mc,
767     nvlist_t *auth)
768 {
769 	int i, err, nerr = 0;
770 	nvpair_t *nvp;
771 	tnode_t *dimmnode;
772 	nvlist_t *fmri, *asru, **dimmarr = NULL;
773 	uint64_t num;
774 	uint_t ndimm;
775 
776 	if (nvlist_lookup_nvlist_array(mc, "dimmlist", &dimmarr, &ndimm) != 0) {
777 		whinge(mod, NULL, "dimm_create: dimmlist lookup failed\n");
778 		return (-1);
779 	}
780 
781 	if (ndimm == 0)
782 		return (0);	/* no dimms present on this node */
783 
784 	if (topo_node_range_create(mod, pnode, name, 0, MAX_DIMMNUM) < 0) {
785 		whinge(mod, NULL, "dimm_create: range create failed\n");
786 		return (-1);
787 	}
788 
789 	for (i = 0; i < ndimm; i++) {
790 		if (nvlist_lookup_uint64(dimmarr[i], "num", &num) != 0) {
791 			whinge(mod, &nerr, "dimm_create: dimm num property "
792 			    "missing\n");
793 			continue;
794 		}
795 
796 		if (mkrsrc(mod, pnode, name, num, auth, &fmri) < 0) {
797 			whinge(mod, &nerr, "dimm_create: mkrsrc failed\n");
798 			continue;
799 		}
800 
801 		if ((dimmnode = topo_node_bind(mod, pnode, name, num, fmri))
802 		    == NULL) {
803 			nvlist_free(fmri);
804 			whinge(mod, &nerr, "dimm_create: node bind "
805 			    "failed\n");
806 			continue;
807 		}
808 
809 		/*
810 		 * Use the mem computation method directly to publish the asru
811 		 * in the "mem" scheme.
812 		 */
813 		if (mem_asru_create(mod, fmri, &asru) == 0) {
814 			(void) topo_node_asru_set(dimmnode, asru, 0, &err);
815 			nvlist_free(asru);
816 		} else {
817 
818 			nvlist_free(fmri);
819 			whinge(mod, &nerr, "dimm_create: mem_asru_compute "
820 			    "failed\n");
821 			continue;
822 		}
823 
824 		(void) topo_node_fru_set(dimmnode, fmri, 0, &err);
825 
826 		nvlist_free(fmri);
827 
828 		(void) topo_pgroup_create(dimmnode, &dimm_pgroup, &err);
829 
830 		for (nvp = nvlist_next_nvpair(dimmarr[i], NULL); nvp != NULL;
831 		    nvp = nvlist_next_nvpair(dimmarr[i], nvp)) {
832 			if (nvpair_type(nvp) == DATA_TYPE_UINT64_ARRAY &&
833 			    strcmp(nvpair_name(nvp), "csnums") == 0 ||
834 			    nvpair_type(nvp) == DATA_TYPE_STRING_ARRAY &&
835 			    strcmp(nvpair_name(nvp), "csnames") == 0)
836 				continue;	/* used in rank_create() */
837 
838 			nerr += nvprop_add(mod, nvp, PGNAME(DIMM), dimmnode);
839 		}
840 
841 		nerr += rank_create(mod, dimmnode, dimmarr[i], auth);
842 	}
843 
844 	return (nerr == 0 ? 0 : -1);
845 }
846 
847 static nvlist_t *
848 mc_lookup_by_mcid(topo_mod_t *mod, topo_instance_t id)
849 {
850 	mc_snapshot_info_t mcs;
851 	void *buf = NULL;
852 	uint8_t ver;
853 
854 	nvlist_t *nvl = NULL;
855 	char path[64];
856 	int fd, err;
857 
858 	(void) snprintf(path, sizeof (path), "/dev/mc/mc%d", id);
859 	fd = open(path, O_RDONLY);
860 
861 	if (fd == -1) {
862 		/*
863 		 * Some v20z and v40z systems may have had the 3rd-party
864 		 * NWSnps packagae installed which installs a /dev/mc
865 		 * link.  So try again via /devices.
866 		 */
867 		(void) snprintf(path, sizeof (path),
868 		    "/devices/pci@0,0/pci1022,1102@%x,2:mc-amd",
869 		    MC_AMD_DEV_OFFSET + id);
870 		fd = open(path, O_RDONLY);
871 	}
872 
873 	if (fd == -1) {
874 		whinge(mod, NULL, "mc failed to open %s: %s\n",
875 		    path, strerror(errno));
876 		return (NULL);
877 	}
878 
879 	if (ioctl(fd, MC_IOC_SNAPSHOT_INFO, &mcs) == -1 ||
880 	    (buf = topo_mod_alloc(mod, mcs.mcs_size)) == NULL ||
881 	    ioctl(fd, MC_IOC_SNAPSHOT, buf) == -1) {
882 
883 		whinge(mod, NULL, "mc failed to snapshot %s: %s\n",
884 		    path, strerror(errno));
885 
886 		free(buf);
887 		(void) close(fd);
888 		return (NULL);
889 	}
890 
891 	(void) close(fd);
892 	err = nvlist_unpack(buf, mcs.mcs_size, &nvl, 0);
893 	topo_mod_free(mod, buf, mcs.mcs_size);
894 
895 
896 	if (nvlist_lookup_uint8(nvl, MC_NVLIST_VERSTR, &ver) != 0) {
897 		whinge(mod, NULL, "mc nvlist is not versioned\n");
898 		nvlist_free(nvl);
899 		return (NULL);
900 	} else if (ver != MC_NVLIST_VERS1) {
901 		whinge(mod, NULL, "mc nvlist version mismatch\n");
902 		nvlist_free(nvl);
903 		return (NULL);
904 	}
905 
906 	return (err ? NULL : nvl);
907 }
908 
909 static int
910 mc_create(topo_mod_t *mod, tnode_t *pnode, const char *name, nvlist_t *auth)
911 {
912 	int err, rc = 0;
913 	tnode_t *mcnode;
914 	nvlist_t *fmri;
915 	nvpair_t *nvp;
916 	nvlist_t *mc = NULL;
917 	int i;
918 
919 	if (mkrsrc(mod, pnode, name, 0, auth, &fmri) != 0) {
920 		whinge(mod, NULL, "mc_create: mkrsrc failed\n");
921 		return (-1);
922 	}
923 
924 	if (topo_node_range_create(mod, pnode, name, 0, 0) < 0) {
925 		nvlist_free(fmri);
926 		whinge(mod, NULL, "mc_create: node range create failed\n");
927 		return (-1);
928 	}
929 
930 	/*
931 	 * Gather and create memory controller topology
932 	 */
933 	if ((mc = mc_lookup_by_mcid(mod, topo_node_instance(pnode))) == NULL ||
934 	    (mcnode = topo_node_bind(mod, pnode,
935 	    name, 0, fmri)) == NULL) {
936 		if (mc != NULL)
937 			nvlist_free(mc);
938 		topo_node_range_destroy(pnode, name);
939 		nvlist_free(fmri);
940 		whinge(mod, NULL, "mc_create: mc lookup or bind failed\n");
941 		return (-1);
942 	}
943 
944 	(void) topo_node_fru_set(mcnode, NULL, 0, &err);
945 	nvlist_free(fmri);
946 
947 	/*
948 	 * Add memory controller properties
949 	 */
950 	(void) topo_pgroup_create(mcnode, &mc_pgroup, &err);
951 
952 	for (nvp = nvlist_next_nvpair(mc, NULL); nvp != NULL;
953 	    nvp = nvlist_next_nvpair(mc, nvp)) {
954 		char *name = nvpair_name(nvp);
955 		data_type_t type = nvpair_type(nvp);
956 
957 		if (type == DATA_TYPE_NVLIST_ARRAY &&
958 		    (strcmp(name, "cslist") == 0 ||
959 		    strcmp(name, "dimmlist") == 0)) {
960 			continue;
961 		} else if (type == DATA_TYPE_UINT8 &&
962 		    strcmp(name, MC_NVLIST_VERSTR) == 0) {
963 			continue;
964 		} else if (type == DATA_TYPE_NVLIST &&
965 		    strcmp(name, "htconfig") == 0) {
966 			nvlist_t *htnvl;
967 
968 			(void) nvpair_value_nvlist(nvp, &htnvl);
969 			if (chip_htconfig(mod, pnode, htnvl) != 0)
970 				rc = -1;
971 		} else {
972 			if (nvprop_add(mod, nvp, PGNAME(MCT), mcnode) != 0)
973 				rc = -1;
974 		}
975 	}
976 
977 	if (dramchan_create(mod, mcnode, CHAN_NODE_NAME, auth) != 0 ||
978 	    cs_create(mod, mcnode, CS_NODE_NAME, mc, auth) != 0 ||
979 	    dimm_create(mod, mcnode, DIMM_NODE_NAME, mc, auth) != 0)
980 		rc = -1;
981 
982 	/*
983 	 * Free the fmris for the chip-selects allocated in cs_create
984 	 */
985 	for (i = 0; i < MC_CHIP_NCS; i++) {
986 		if (cs_fmri[i] != NULL) {
987 			nvlist_free(cs_fmri[i]);
988 			cs_fmri[i] = NULL;
989 		}
990 	}
991 
992 	nvlist_free(mc);
993 	return (rc);
994 }
995 
996 static int
997 chip_create(topo_mod_t *mod, tnode_t *pnode, const char *name,
998     topo_instance_t min, topo_instance_t max, chip_t *chip, nvlist_t *auth)
999 {
1000 	int i, nerr = 0;
1001 	kstat_t *ksp;
1002 	ulong_t *chipmap;
1003 	tnode_t *cnode;
1004 	nvlist_t *fmri;
1005 
1006 	if ((chipmap = topo_mod_zalloc(mod, BT_BITOUL(max) *
1007 	    sizeof (ulong_t))) == NULL)
1008 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
1009 
1010 	/*
1011 	 * Read in all cpu_info kstats, for all chip ids.  The ks_instance
1012 	 * argument to kstat_lookup is the logical cpu_id - we will use this
1013 	 * in cpu_create.
1014 	 */
1015 	for (i = 0; i <= chip->chip_ncpustats; i++) {
1016 		if ((ksp = kstat_lookup(chip->chip_kc, "cpu_info", i, NULL)) ==
1017 		    NULL || kstat_read(chip->chip_kc, ksp, NULL) < 0)
1018 			continue;
1019 
1020 		chip->chip_cpustats[i] = ksp;
1021 	}
1022 
1023 	for (i = 0; i <= chip->chip_ncpustats; i++) {
1024 		kstat_named_t *k;
1025 		int err, chipid;
1026 
1027 		if ((ksp = chip->chip_cpustats[i]) == NULL)
1028 			continue;
1029 
1030 		if ((k = kstat_data_lookup(ksp, "chip_id")) == NULL) {
1031 			whinge(mod, &nerr, "chip_create: chip_id lookup "
1032 			    "via kstats failed\n");
1033 			continue;
1034 		}
1035 
1036 		chipid = k->value.l;
1037 		if (BT_TEST(chipmap, chipid))
1038 			continue;
1039 
1040 		if (chipid < min || chipid > max)
1041 			continue;
1042 
1043 		if (mkrsrc(mod, pnode, name, chipid, auth, &fmri) != 0) {
1044 			whinge(mod, &nerr, "chip_create: mkrsrc failed\n");
1045 			continue;
1046 		}
1047 
1048 		if ((cnode = topo_node_bind(mod, pnode, name, chipid, fmri))
1049 		    == NULL) {
1050 			nvlist_free(fmri);
1051 			whinge(mod, &nerr, "chip_create: node bind "
1052 			    "failed for chipid %d\n", chipid);
1053 			continue;
1054 		}
1055 		BT_SET(chipmap, chipid);
1056 
1057 		(void) topo_node_fru_set(cnode, fmri, 0, &err);
1058 
1059 		nvlist_free(fmri);
1060 
1061 		(void) topo_pgroup_create(cnode, &chip_pgroup, &err);
1062 		if (add_kstat_strprop(mod, cnode, ksp, PGNAME(CHIP),
1063 		    CHIP_VENDOR_ID) != 0)
1064 			nerr++;		/* have whinged elsewhere */
1065 
1066 		if (add_kstat_longprops(mod, cnode, ksp, PGNAME(CHIP),
1067 		    CHIP_FAMILY, CHIP_MODEL, CHIP_STEPPING, NULL) != 0)
1068 			nerr++;		/* have whinged elsewhere */
1069 
1070 		if (cpu_create(mod, cnode, CPU_NODE_NAME, chipid, chip, auth)
1071 		    != 0 || mc_create(mod, cnode, MCT_NODE_NAME, auth) != 0)
1072 			nerr++;		/* have whinged elsewhere */
1073 	}
1074 
1075 	topo_mod_free(mod, chipmap, BT_BITOUL(max) * sizeof (ulong_t));
1076 
1077 	if (nerr == 0) {
1078 		return (0);
1079 	} else {
1080 		(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
1081 		return (-1);
1082 	}
1083 }
1084 
1085 /*ARGSUSED*/
1086 static int
1087 chip_enum(topo_mod_t *mod, tnode_t *pnode, const char *name,
1088     topo_instance_t min, topo_instance_t max, void *arg, void *notused)
1089 {
1090 	int rv = 0;
1091 	chip_t *chip = (chip_t *)arg;
1092 	nvlist_t *auth = NULL;
1093 
1094 	auth = topo_mod_auth(mod, pnode);
1095 
1096 	if (strcmp(name, CHIP_NODE_NAME) == 0)
1097 		rv = chip_create(mod, pnode, name, min, max, chip, auth);
1098 
1099 	nvlist_free(auth);
1100 
1101 	return (rv);
1102 }
1103