xref: /dragonfly/sys/kern/subr_cpu_topology.c (revision 16dd80e4)
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  * 3. Neither the name of The DragonFly Project nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific, prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sysctl.h>
37 #include <sys/sbuf.h>
38 #include <sys/cpu_topology.h>
39 
40 #include <machine/smp.h>
41 
42 #ifndef NAPICID
43 #define NAPICID 256
44 #endif
45 
46 #define INDENT_BUF_SIZE LEVEL_NO*3
47 #define INVALID_ID -1
48 
49 /* Per-cpu sysctl nodes and info */
50 struct per_cpu_sysctl_info {
51 	struct sysctl_ctx_list sysctl_ctx;
52 	struct sysctl_oid *sysctl_tree;
53 	char cpu_name[32];
54 	int physical_id;
55 	int core_id;
56 	int ht_id;				/* thread id within core */
57 	char physical_siblings[8*MAXCPU];
58 	char core_siblings[8*MAXCPU];
59 };
60 typedef struct per_cpu_sysctl_info per_cpu_sysctl_info_t;
61 
62 static cpu_node_t cpu_topology_nodes[MAXCPU];	/* Memory for topology */
63 static cpu_node_t *cpu_root_node;		/* Root node pointer */
64 
65 static struct sysctl_ctx_list cpu_topology_sysctl_ctx;
66 static struct sysctl_oid *cpu_topology_sysctl_tree;
67 static char cpu_topology_members[8*MAXCPU];
68 static per_cpu_sysctl_info_t *pcpu_sysctl;
69 static void sbuf_print_cpuset(struct sbuf *sb, cpumask_t *mask);
70 
71 int cpu_topology_levels_number = 1;
72 int cpu_topology_ht_ids;
73 int cpu_topology_core_ids;
74 int cpu_topology_phys_ids;
75 cpu_node_t *root_cpu_node;
76 
77 MALLOC_DEFINE(M_PCPUSYS, "pcpusys", "pcpu sysctl topology");
78 
79 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_ht_ids, CTLFLAG_RW,
80 	   &cpu_topology_ht_ids, 0, "# of logical cores per real core");
81 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_core_ids, CTLFLAG_RW,
82 	   &cpu_topology_core_ids, 0, "# of real cores per package");
83 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_phys_ids, CTLFLAG_RW,
84 	   &cpu_topology_phys_ids, 0, "# of physical packages");
85 
86 /* Get the next valid apicid starting
87  * from current apicid (curr_apicid
88  */
89 static int
90 get_next_valid_apicid(int curr_apicid)
91 {
92 	int next_apicid = curr_apicid;
93 	do {
94 		next_apicid++;
95 	}
96 	while(get_cpuid_from_apicid(next_apicid) == -1 &&
97 	   next_apicid < NAPICID);
98 	if (next_apicid == NAPICID) {
99 		kprintf("Warning: No next valid APICID found. Returning -1\n");
100 		return -1;
101 	}
102 	return next_apicid;
103 }
104 
105 /* Generic topology tree. The parameters have the following meaning:
106  * - children_no_per_level : the number of children on each level
107  * - level_types : the type of the level (THREAD, CORE, CHIP, etc)
108  * - cur_level : the current level of the tree
109  * - node : the current node
110  * - last_free_node : the last free node in the global array.
111  * - cpuid : basicly this are the ids of the leafs
112  */
113 static void
114 build_topology_tree(int *children_no_per_level,
115    uint8_t *level_types,
116    int cur_level,
117    cpu_node_t *node,
118    cpu_node_t **last_free_node,
119    int *apicid)
120 {
121 	int i;
122 
123 	node->child_no = children_no_per_level[cur_level];
124 	node->type = level_types[cur_level];
125 	CPUMASK_ASSZERO(node->members);
126 	node->compute_unit_id = -1;
127 
128 	if (node->child_no == 0) {
129 		*apicid = get_next_valid_apicid(*apicid);
130 		CPUMASK_ASSBIT(node->members, get_cpuid_from_apicid(*apicid));
131 		return;
132 	}
133 
134 	if (node->parent_node == NULL)
135 		root_cpu_node = node;
136 
137 	for (i = 0; i < node->child_no; i++) {
138 		node->child_node[i] = *last_free_node;
139 		(*last_free_node)++;
140 
141 		node->child_node[i]->parent_node = node;
142 
143 		build_topology_tree(children_no_per_level,
144 		    level_types,
145 		    cur_level + 1,
146 		    node->child_node[i],
147 		    last_free_node,
148 		    apicid);
149 
150 		CPUMASK_ORMASK(node->members, node->child_node[i]->members);
151 	}
152 }
153 
154 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
155 static void
156 migrate_elements(cpu_node_t **a, int n, int pos)
157 {
158 	int i;
159 
160 	for (i = pos; i < n - 1 ; i++) {
161 		a[i] = a[i+1];
162 	}
163 	a[i] = NULL;
164 }
165 #endif
166 
167 /* Build CPU topology. The detection is made by comparing the
168  * chip, core and logical IDs of each CPU with the IDs of the
169  * BSP. When we found a match, at that level the CPUs are siblings.
170  */
171 static void
172 build_cpu_topology(int assumed_ncpus)
173 {
174 	int i;
175 	int BSPID = 0;
176 	int threads_per_core = 0;
177 	int cores_per_chip = 0;
178 	int chips_per_package = 0;
179 	int children_no_per_level[LEVEL_NO];
180 	uint8_t level_types[LEVEL_NO];
181 	int apicid = -1;
182 	cpu_node_t *root = &cpu_topology_nodes[0];
183 	cpu_node_t *last_free_node = root + 1;
184 
185 	detect_cpu_topology();
186 
187 	/*
188 	 * Assume that the topology is uniform.
189 	 * Find the number of siblings within chip
190 	 * and witin core to build up the topology.
191 	 */
192 	for (i = 0; i < assumed_ncpus; i++) {
193 		cpumask_t mask;
194 
195 		CPUMASK_ASSBIT(mask, i);
196 
197 #if 0
198 		/* smp_active_mask has not been initialized yet, ignore */
199 		if (CPUMASK_TESTMASK(mask, smp_active_mask) == 0)
200 			continue;
201 #endif
202 
203 		if (get_chip_ID(BSPID) != get_chip_ID(i))
204 			continue;
205 		++cores_per_chip;
206 
207 		if (get_core_number_within_chip(BSPID) ==
208 		    get_core_number_within_chip(i)) {
209 			++threads_per_core;
210 		}
211 	}
212 
213 	cores_per_chip /= threads_per_core;
214 	chips_per_package = assumed_ncpus / (cores_per_chip * threads_per_core);
215 
216 	kprintf("CPU Topology: cores_per_chip: %d; threads_per_core: %d; "
217 		"chips_per_package: %d;\n",
218 		cores_per_chip, threads_per_core, chips_per_package);
219 
220 	if (threads_per_core > 1) { /* HT available - 4 levels */
221 
222 		children_no_per_level[0] = chips_per_package;
223 		children_no_per_level[1] = cores_per_chip;
224 		children_no_per_level[2] = threads_per_core;
225 		children_no_per_level[3] = 0;
226 
227 		level_types[0] = PACKAGE_LEVEL;
228 		level_types[1] = CHIP_LEVEL;
229 		level_types[2] = CORE_LEVEL;
230 		level_types[3] = THREAD_LEVEL;
231 
232 		build_topology_tree(children_no_per_level,
233 		    level_types,
234 		    0,
235 		    root,
236 		    &last_free_node,
237 		    &apicid);
238 
239 		cpu_topology_levels_number = 4;
240 
241 	} else if (cores_per_chip > 1) { /* No HT available - 3 levels */
242 
243 		children_no_per_level[0] = chips_per_package;
244 		children_no_per_level[1] = cores_per_chip;
245 		children_no_per_level[2] = 0;
246 
247 		level_types[0] = PACKAGE_LEVEL;
248 		level_types[1] = CHIP_LEVEL;
249 		level_types[2] = CORE_LEVEL;
250 
251 		build_topology_tree(children_no_per_level,
252 		    level_types,
253 		    0,
254 		    root,
255 		    &last_free_node,
256 		    &apicid);
257 
258 		cpu_topology_levels_number = 3;
259 
260 	} else { /* No HT and no Multi-Core - 2 levels */
261 
262 		children_no_per_level[0] = chips_per_package;
263 		children_no_per_level[1] = 0;
264 
265 		level_types[0] = PACKAGE_LEVEL;
266 		level_types[1] = CHIP_LEVEL;
267 
268 		build_topology_tree(children_no_per_level,
269 		    level_types,
270 		    0,
271 		    root,
272 		    &last_free_node,
273 		    &apicid);
274 
275 		cpu_topology_levels_number = 2;
276 
277 	}
278 
279 	cpu_root_node = root;
280 
281 
282 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
283 	if (fix_amd_topology() == 0) {
284 		int visited[MAXCPU], i, j, pos, cpuid;
285 		cpu_node_t *leaf, *parent;
286 
287 		bzero(visited, MAXCPU * sizeof(int));
288 
289 		for (i = 0; i < assumed_ncpus; i++) {
290 			if (visited[i] == 0) {
291 				pos = 0;
292 				visited[i] = 1;
293 				leaf = get_cpu_node_by_cpuid(i);
294 
295 				if (leaf->type == CORE_LEVEL) {
296 					parent = leaf->parent_node;
297 
298 					last_free_node->child_node[0] = leaf;
299 					last_free_node->child_no = 1;
300 					last_free_node->members = leaf->members;
301 					last_free_node->compute_unit_id = leaf->compute_unit_id;
302 					last_free_node->parent_node = parent;
303 					last_free_node->type = CORE_LEVEL;
304 
305 
306 					for (j = 0; j < parent->child_no; j++) {
307 						if (parent->child_node[j] != leaf) {
308 
309 							cpuid = BSFCPUMASK(parent->child_node[j]->members);
310 							if (visited[cpuid] == 0 &&
311 							    parent->child_node[j]->compute_unit_id == leaf->compute_unit_id) {
312 
313 								last_free_node->child_node[last_free_node->child_no] = parent->child_node[j];
314 								last_free_node->child_no++;
315 								CPUMASK_ORMASK(last_free_node->members, parent->child_node[j]->members);
316 
317 								parent->child_node[j]->type = THREAD_LEVEL;
318 								parent->child_node[j]->parent_node = last_free_node;
319 								visited[cpuid] = 1;
320 
321 								migrate_elements(parent->child_node, parent->child_no, j);
322 								parent->child_no--;
323 								j--;
324 							}
325 						} else {
326 							pos = j;
327 						}
328 					}
329 					if (last_free_node->child_no > 1) {
330 						parent->child_node[pos] = last_free_node;
331 						leaf->type = THREAD_LEVEL;
332 						leaf->parent_node = last_free_node;
333 						last_free_node++;
334 					}
335 				}
336 			}
337 		}
338 	}
339 #endif
340 }
341 
342 /* Recursive function helper to print the CPU topology tree */
343 static void
344 print_cpu_topology_tree_sysctl_helper(cpu_node_t *node,
345     struct sbuf *sb,
346     char * buf,
347     int buf_len,
348     int last)
349 {
350 	int i;
351 	int bsr_member;
352 
353 	sbuf_bcat(sb, buf, buf_len);
354 	if (last) {
355 		sbuf_printf(sb, "\\-");
356 		buf[buf_len] = ' ';buf_len++;
357 		buf[buf_len] = ' ';buf_len++;
358 	} else {
359 		sbuf_printf(sb, "|-");
360 		buf[buf_len] = '|';buf_len++;
361 		buf[buf_len] = ' ';buf_len++;
362 	}
363 
364 	bsr_member = BSRCPUMASK(node->members);
365 
366 	if (node->type == PACKAGE_LEVEL) {
367 		sbuf_printf(sb,"PACKAGE MEMBERS: ");
368 	} else if (node->type == CHIP_LEVEL) {
369 		sbuf_printf(sb,"CHIP ID %d: ",
370 			get_chip_ID(bsr_member));
371 	} else if (node->type == CORE_LEVEL) {
372 		if (node->compute_unit_id != (uint8_t)-1) {
373 			sbuf_printf(sb,"Compute Unit ID %d: ",
374 				node->compute_unit_id);
375 		} else {
376 			sbuf_printf(sb,"CORE ID %d: ",
377 				get_core_number_within_chip(bsr_member));
378 		}
379 	} else if (node->type == THREAD_LEVEL) {
380 		if (node->compute_unit_id != (uint8_t)-1) {
381 			sbuf_printf(sb,"THREAD ID %d: ",
382 				get_core_number_within_chip(bsr_member));
383 		} else {
384 			sbuf_printf(sb,"THREAD ID %d: ",
385 				get_logical_CPU_number_within_core(bsr_member));
386 		}
387 	} else {
388 		sbuf_printf(sb,"UNKNOWN: ");
389 	}
390 	sbuf_print_cpuset(sb, &node->members);
391 	sbuf_printf(sb,"\n");
392 
393 	for (i = 0; i < node->child_no; i++) {
394 		print_cpu_topology_tree_sysctl_helper(node->child_node[i],
395 		    sb, buf, buf_len, i == (node->child_no -1));
396 	}
397 }
398 
399 /* SYSCTL PROCEDURE for printing the CPU Topology tree */
400 static int
401 print_cpu_topology_tree_sysctl(SYSCTL_HANDLER_ARGS)
402 {
403 	struct sbuf *sb;
404 	int ret;
405 	char buf[INDENT_BUF_SIZE];
406 
407 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
408 
409 	sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
410 	if (sb == NULL) {
411 		return (ENOMEM);
412 	}
413 	sbuf_printf(sb,"\n");
414 	print_cpu_topology_tree_sysctl_helper(cpu_root_node, sb, buf, 0, 1);
415 
416 	sbuf_finish(sb);
417 
418 	ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
419 
420 	sbuf_delete(sb);
421 
422 	return ret;
423 }
424 
425 /* SYSCTL PROCEDURE for printing the CPU Topology level description */
426 static int
427 print_cpu_topology_level_description_sysctl(SYSCTL_HANDLER_ARGS)
428 {
429 	struct sbuf *sb;
430 	int ret;
431 
432 	sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
433 	if (sb == NULL)
434 		return (ENOMEM);
435 
436 	if (cpu_topology_levels_number == 4) /* HT available */
437 		sbuf_printf(sb, "0 - thread; 1 - core; 2 - socket; 3 - anything");
438 	else if (cpu_topology_levels_number == 3) /* No HT available */
439 		sbuf_printf(sb, "0 - core; 1 - socket; 2 - anything");
440 	else if (cpu_topology_levels_number == 2) /* No HT and no Multi-Core */
441 		sbuf_printf(sb, "0 - socket; 1 - anything");
442 	else
443 		sbuf_printf(sb, "Unknown");
444 
445 	sbuf_finish(sb);
446 
447 	ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
448 
449 	sbuf_delete(sb);
450 
451 	return ret;
452 }
453 
454 /* Find a cpu_node_t by a mask */
455 static cpu_node_t *
456 get_cpu_node_by_cpumask(cpu_node_t * node,
457 			cpumask_t mask) {
458 
459 	cpu_node_t * found = NULL;
460 	int i;
461 
462 	if (CPUMASK_CMPMASKEQ(node->members, mask))
463 		return node;
464 
465 	for (i = 0; i < node->child_no; i++) {
466 		found = get_cpu_node_by_cpumask(node->child_node[i], mask);
467 		if (found != NULL) {
468 			return found;
469 		}
470 	}
471 	return NULL;
472 }
473 
474 cpu_node_t *
475 get_cpu_node_by_cpuid(int cpuid) {
476 	cpumask_t mask;
477 
478 	CPUMASK_ASSBIT(mask, cpuid);
479 
480 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
481 
482 	return get_cpu_node_by_cpumask(cpu_root_node, mask);
483 }
484 
485 /* Get the mask of siblings for level_type of a cpuid */
486 cpumask_t
487 get_cpumask_from_level(int cpuid,
488 			uint8_t level_type)
489 {
490 	cpu_node_t * node;
491 	cpumask_t mask;
492 
493 	CPUMASK_ASSBIT(mask, cpuid);
494 
495 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
496 
497 	node = get_cpu_node_by_cpumask(cpu_root_node, mask);
498 
499 	if (node == NULL) {
500 		CPUMASK_ASSZERO(mask);
501 		return mask;
502 	}
503 
504 	while (node != NULL) {
505 		if (node->type == level_type) {
506 			return node->members;
507 		}
508 		node = node->parent_node;
509 	}
510 	CPUMASK_ASSZERO(mask);
511 
512 	return mask;
513 }
514 
515 static const cpu_node_t *
516 get_cpu_node_by_chipid2(const cpu_node_t *node, int chip_id)
517 {
518 	int cpuid;
519 
520 	if (node->type != CHIP_LEVEL) {
521 		const cpu_node_t *ret = NULL;
522 		int i;
523 
524 		for (i = 0; i < node->child_no; ++i) {
525 			ret = get_cpu_node_by_chipid2(node->child_node[i],
526 			    chip_id);
527 			if (ret != NULL)
528 				break;
529 		}
530 		return ret;
531 	}
532 
533 	cpuid = BSRCPUMASK(node->members);
534 	if (get_chip_ID(cpuid) == chip_id)
535 		return node;
536 	return NULL;
537 }
538 
539 const cpu_node_t *
540 get_cpu_node_by_chipid(int chip_id)
541 {
542 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
543 	return get_cpu_node_by_chipid2(cpu_root_node, chip_id);
544 }
545 
546 /* init pcpu_sysctl structure info */
547 static void
548 init_pcpu_topology_sysctl(int assumed_ncpus)
549 {
550 	struct sbuf sb;
551 	cpumask_t mask;
552 	int min_id = -1;
553 	int max_id = -1;
554 	int i;
555 	int phys_id;
556 
557 	pcpu_sysctl = kmalloc(sizeof(*pcpu_sysctl) * MAXCPU, M_PCPUSYS,
558 			      M_INTWAIT | M_ZERO);
559 
560 	for (i = 0; i < assumed_ncpus; i++) {
561 		sbuf_new(&sb, pcpu_sysctl[i].cpu_name,
562 		    sizeof(pcpu_sysctl[i].cpu_name), SBUF_FIXEDLEN);
563 		sbuf_printf(&sb,"cpu%d", i);
564 		sbuf_finish(&sb);
565 
566 
567 		/* Get physical siblings */
568 		mask = get_cpumask_from_level(i, CHIP_LEVEL);
569 		if (CPUMASK_TESTZERO(mask)) {
570 			pcpu_sysctl[i].physical_id = INVALID_ID;
571 			continue;
572 		}
573 
574 		sbuf_new(&sb, pcpu_sysctl[i].physical_siblings,
575 		    sizeof(pcpu_sysctl[i].physical_siblings), SBUF_FIXEDLEN);
576 		sbuf_print_cpuset(&sb, &mask);
577 		sbuf_trim(&sb);
578 		sbuf_finish(&sb);
579 
580 		phys_id = get_chip_ID(i);
581 		pcpu_sysctl[i].physical_id = phys_id;
582 		if (min_id < 0 || min_id > phys_id)
583 			min_id = phys_id;
584 		if (max_id < 0 || max_id < phys_id)
585 			max_id = phys_id;
586 
587 		/* Get core siblings */
588 		mask = get_cpumask_from_level(i, CORE_LEVEL);
589 		if (CPUMASK_TESTZERO(mask)) {
590 			pcpu_sysctl[i].core_id = INVALID_ID;
591 			continue;
592 		}
593 
594 		sbuf_new(&sb, pcpu_sysctl[i].core_siblings,
595 		    sizeof(pcpu_sysctl[i].core_siblings), SBUF_FIXEDLEN);
596 		sbuf_print_cpuset(&sb, &mask);
597 		sbuf_trim(&sb);
598 		sbuf_finish(&sb);
599 
600 		pcpu_sysctl[i].core_id = get_core_number_within_chip(i);
601 		if (cpu_topology_core_ids < pcpu_sysctl[i].core_id + 1)
602 			cpu_topology_core_ids = pcpu_sysctl[i].core_id + 1;
603 
604 		pcpu_sysctl[i].ht_id = get_logical_CPU_number_within_core(i);
605 		if (cpu_topology_ht_ids < pcpu_sysctl[i].ht_id + 1)
606 			cpu_topology_ht_ids = pcpu_sysctl[i].ht_id + 1;
607 	}
608 
609 	/*
610 	 * Normalize physical ids so they can be used by the VM system.
611 	 * Some systems number starting at 0 others number starting at 1.
612 	 */
613 	cpu_topology_phys_ids = max_id - min_id + 1;
614 	if (cpu_topology_phys_ids <= 0)		/* don't crash */
615 		cpu_topology_phys_ids = 1;
616 	for (i = 0; i < assumed_ncpus; i++) {
617 		pcpu_sysctl[i].physical_id %= cpu_topology_phys_ids;
618 	}
619 }
620 
621 /* Build SYSCTL structure for revealing
622  * the CPU Topology to user-space.
623  */
624 static void
625 build_sysctl_cpu_topology(int assumed_ncpus)
626 {
627 	int i;
628 	struct sbuf sb;
629 
630 	/* SYSCTL new leaf for "cpu_topology" */
631 	sysctl_ctx_init(&cpu_topology_sysctl_ctx);
632 	cpu_topology_sysctl_tree = SYSCTL_ADD_NODE(&cpu_topology_sysctl_ctx,
633 	    SYSCTL_STATIC_CHILDREN(_hw),
634 	    OID_AUTO,
635 	    "cpu_topology",
636 	    CTLFLAG_RD, 0, "");
637 
638 	/* SYSCTL cpu_topology "tree" entry */
639 	SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
640 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
641 	    OID_AUTO, "tree", CTLTYPE_STRING | CTLFLAG_RD,
642 	    NULL, 0, print_cpu_topology_tree_sysctl, "A",
643 	    "Tree print of CPU topology");
644 
645 	/* SYSCTL cpu_topology "level_description" entry */
646 	SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
647 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
648 	    OID_AUTO, "level_description", CTLTYPE_STRING | CTLFLAG_RD,
649 	    NULL, 0, print_cpu_topology_level_description_sysctl, "A",
650 	    "Level description of CPU topology");
651 
652 	/* SYSCTL cpu_topology "members" entry */
653 	sbuf_new(&sb, cpu_topology_members,
654 	    sizeof(cpu_topology_members), SBUF_FIXEDLEN);
655 	sbuf_print_cpuset(&sb, &cpu_root_node->members);
656 	sbuf_trim(&sb);
657 	sbuf_finish(&sb);
658 	SYSCTL_ADD_STRING(&cpu_topology_sysctl_ctx,
659 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
660 	    OID_AUTO, "members", CTLFLAG_RD,
661 	    cpu_topology_members, 0,
662 	    "Members of the CPU Topology");
663 
664 	/* SYSCTL per_cpu info */
665 	for (i = 0; i < assumed_ncpus; i++) {
666 		/* New leaf : hw.cpu_topology.cpux */
667 		sysctl_ctx_init(&pcpu_sysctl[i].sysctl_ctx);
668 		pcpu_sysctl[i].sysctl_tree = SYSCTL_ADD_NODE(&pcpu_sysctl[i].sysctl_ctx,
669 		    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
670 		    OID_AUTO,
671 		    pcpu_sysctl[i].cpu_name,
672 		    CTLFLAG_RD, 0, "");
673 
674 		/* Check if the physical_id found is valid */
675 		if (pcpu_sysctl[i].physical_id == INVALID_ID) {
676 			continue;
677 		}
678 
679 		/* Add physical id info */
680 		SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
681 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
682 		    OID_AUTO, "physical_id", CTLFLAG_RD,
683 		    &pcpu_sysctl[i].physical_id, 0,
684 		    "Physical ID");
685 
686 		/* Add physical siblings */
687 		SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
688 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
689 		    OID_AUTO, "physical_siblings", CTLFLAG_RD,
690 		    pcpu_sysctl[i].physical_siblings, 0,
691 		    "Physical siblings");
692 
693 		/* Check if the core_id found is valid */
694 		if (pcpu_sysctl[i].core_id == INVALID_ID) {
695 			continue;
696 		}
697 
698 		/* Add core id info */
699 		SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
700 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
701 		    OID_AUTO, "core_id", CTLFLAG_RD,
702 		    &pcpu_sysctl[i].core_id, 0,
703 		    "Core ID");
704 
705 		/*Add core siblings */
706 		SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
707 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
708 		    OID_AUTO, "core_siblings", CTLFLAG_RD,
709 		    pcpu_sysctl[i].core_siblings, 0,
710 		    "Core siblings");
711 	}
712 }
713 
714 static
715 void
716 sbuf_print_cpuset(struct sbuf *sb, cpumask_t *mask)
717 {
718 	int i;
719 	int b = -1;
720 	int e = -1;
721 	int more = 0;
722 
723 	sbuf_printf(sb, "cpus(");
724 	CPUSET_FOREACH(i, *mask) {
725 		if (b < 0) {
726 			b = i;
727 			e = b + 1;
728 			continue;
729 		}
730 		if (e == i) {
731 			++e;
732 			continue;
733 		}
734 		if (more)
735 			sbuf_printf(sb, ", ");
736 		if (b == e - 1) {
737 			sbuf_printf(sb, "%d", b);
738 		} else {
739 			sbuf_printf(sb, "%d-%d", b, e - 1);
740 		}
741 		more = 1;
742 		b = i;
743 		e = b + 1;
744 	}
745 	if (more)
746 		sbuf_printf(sb, ", ");
747 	if (b >= 0) {
748 		if (b == e - 1) {
749 			sbuf_printf(sb, "%d", b);
750 		} else {
751 			sbuf_printf(sb, "%d-%d", b, e - 1);
752 		}
753 	}
754 	sbuf_printf(sb, ") ");
755 }
756 
757 int
758 get_cpu_ht_id(int cpuid)
759 {
760 	if (pcpu_sysctl)
761 		return(pcpu_sysctl[cpuid].ht_id);
762 	return(0);
763 }
764 
765 int
766 get_cpu_core_id(int cpuid)
767 {
768 	if (pcpu_sysctl)
769 		return(pcpu_sysctl[cpuid].core_id);
770 	return(0);
771 }
772 
773 int
774 get_cpu_phys_id(int cpuid)
775 {
776 	if (pcpu_sysctl)
777 		return(pcpu_sysctl[cpuid].physical_id);
778 	return(0);
779 }
780 
781 /*
782  * Returns the highest amount of memory attached to any single node.
783  * Returns 0 if the system is not NUMA or only has one node.
784  *
785  * This function is used by the scheduler.
786  */
787 long
788 get_highest_node_memory(void)
789 {
790 	long highest = 0;
791 
792         if (cpu_root_node && cpu_root_node->type == PACKAGE_LEVEL &&
793 	    cpu_root_node->child_node[1]) {
794                 cpu_node_t *cpup;
795                 int i;
796 
797                 for (i = 0 ; i < MAXCPU && cpu_root_node->child_node[i]; ++i) {
798                         cpup = cpu_root_node->child_node[i];
799                         if (highest < cpup->phys_mem)
800                                 highest = cpup->phys_mem;
801                 }
802         }
803 	return highest;
804 }
805 
806 extern int naps;
807 
808 /* Build the CPU Topology and SYSCTL Topology tree */
809 static void
810 init_cpu_topology(void)
811 {
812 	int assumed_ncpus;
813 
814 	assumed_ncpus = naps + 1;
815 
816 	build_cpu_topology(assumed_ncpus);
817 	init_pcpu_topology_sysctl(assumed_ncpus);
818 	build_sysctl_cpu_topology(assumed_ncpus);
819 }
820 SYSINIT(cpu_topology, SI_BOOT2_CPU_TOPOLOGY, SI_ORDER_FIRST,
821     init_cpu_topology, NULL);
822