xref: /dragonfly/sys/kern/subr_cpu_topology.c (revision 16d4386a)
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  * 3. Neither the name of The DragonFly Project nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific, prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysctl.h>
37 #include <sys/sbuf.h>
38 #include <sys/cpu_topology.h>
39 
40 #include <machine/smp.h>
41 
42 #ifndef NAPICID
43 #define NAPICID 256
44 #endif
45 
46 #define INDENT_BUF_SIZE LEVEL_NO*3
47 #define INVALID_ID -1
48 
49 /* Per-cpu sysctl nodes and info */
50 struct per_cpu_sysctl_info {
51 	struct sysctl_ctx_list sysctl_ctx;
52 	struct sysctl_oid *sysctl_tree;
53 	char cpu_name[32];
54 	int physical_id;
55 	int core_id;
56 	int ht_id;				/* thread id within core */
57 	char physical_siblings[8*MAXCPU];
58 	char core_siblings[8*MAXCPU];
59 };
60 typedef struct per_cpu_sysctl_info per_cpu_sysctl_info_t;
61 
62 static cpu_node_t cpu_topology_nodes[MAXCPU];	/* Memory for topology */
63 static cpu_node_t *cpu_root_node;		/* Root node pointer */
64 
65 static struct sysctl_ctx_list cpu_topology_sysctl_ctx;
66 static struct sysctl_oid *cpu_topology_sysctl_tree;
67 static char cpu_topology_members[8*MAXCPU];
68 static per_cpu_sysctl_info_t *pcpu_sysctl;
69 static void sbuf_print_cpuset(struct sbuf *sb, cpumask_t *mask);
70 
71 int cpu_topology_levels_number = 1;
72 int cpu_topology_ht_ids;
73 int cpu_topology_core_ids;
74 int cpu_topology_phys_ids;
75 cpu_node_t *root_cpu_node;
76 
77 MALLOC_DEFINE(M_PCPUSYS, "pcpusys", "pcpu sysctl topology");
78 
79 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_ht_ids, CTLFLAG_RW,
80 	   &cpu_topology_ht_ids, 0, "# of logical cores per real core");
81 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_core_ids, CTLFLAG_RW,
82 	   &cpu_topology_core_ids, 0, "# of real cores per package");
83 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_phys_ids, CTLFLAG_RW,
84 	   &cpu_topology_phys_ids, 0, "# of physical packages");
85 
86 /* Get the next valid apicid starting
87  * from current apicid (curr_apicid
88  */
89 static int
90 get_next_valid_apicid(int curr_apicid)
91 {
92 	int next_apicid = curr_apicid;
93 	do {
94 		next_apicid++;
95 	}
96 	while(get_cpuid_from_apicid(next_apicid) == -1 &&
97 	   next_apicid < NAPICID);
98 	if (next_apicid == NAPICID) {
99 		kprintf("Warning: No next valid APICID found. Returning -1\n");
100 		return -1;
101 	}
102 	return next_apicid;
103 }
104 
105 /* Generic topology tree. The parameters have the following meaning:
106  * - children_no_per_level : the number of children on each level
107  * - level_types : the type of the level (THREAD, CORE, CHIP, etc)
108  * - cur_level : the current level of the tree
109  * - node : the current node
110  * - last_free_node : the last free node in the global array.
111  * - cpuid : basicly this are the ids of the leafs
112  */
113 static void
114 build_topology_tree(int *children_no_per_level,
115    uint8_t *level_types,
116    int cur_level,
117    cpu_node_t *node,
118    cpu_node_t **last_free_node,
119    int *apicid)
120 {
121 	int i;
122 
123 	node->child_no = children_no_per_level[cur_level];
124 	node->type = level_types[cur_level];
125 	CPUMASK_ASSZERO(node->members);
126 	node->compute_unit_id = -1;
127 
128 	if (node->child_no == 0) {
129 		*apicid = get_next_valid_apicid(*apicid);
130 		CPUMASK_ASSBIT(node->members, get_cpuid_from_apicid(*apicid));
131 		return;
132 	}
133 
134 	if (node->parent_node == NULL)
135 		root_cpu_node = node;
136 
137 	for (i = 0; i < node->child_no; i++) {
138 		node->child_node[i] = *last_free_node;
139 		(*last_free_node)++;
140 
141 		node->child_node[i]->parent_node = node;
142 
143 		build_topology_tree(children_no_per_level,
144 		    level_types,
145 		    cur_level + 1,
146 		    node->child_node[i],
147 		    last_free_node,
148 		    apicid);
149 
150 		CPUMASK_ORMASK(node->members, node->child_node[i]->members);
151 	}
152 }
153 
154 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
155 static void
156 migrate_elements(cpu_node_t **a, int n, int pos)
157 {
158 	int i;
159 
160 	for (i = pos; i < n - 1 ; i++) {
161 		a[i] = a[i+1];
162 	}
163 	a[i] = NULL;
164 }
165 #endif
166 
167 /* Build CPU topology. The detection is made by comparing the
168  * chip, core and logical IDs of each CPU with the IDs of the
169  * BSP. When we found a match, at that level the CPUs are siblings.
170  */
171 static void
172 build_cpu_topology(int assumed_ncpus)
173 {
174 	int i;
175 	int BSPID = 0;
176 	int threads_per_core = 0;
177 	int cores_per_chip = 0;
178 	int chips_per_package = 0;
179 	int children_no_per_level[LEVEL_NO];
180 	uint8_t level_types[LEVEL_NO];
181 	int apicid = -1;
182 	cpu_node_t *root = &cpu_topology_nodes[0];
183 	cpu_node_t *last_free_node = root + 1;
184 
185 	detect_cpu_topology();
186 
187 	/*
188 	 * Assume that the topology is uniform.
189 	 * Find the number of siblings within the chip
190 	 * and within the core to build up the topology.
191 	 */
192 	for (i = 0; i < assumed_ncpus; i++) {
193 		cpumask_t mask;
194 
195 		CPUMASK_ASSBIT(mask, i);
196 
197 #if 0
198 		/* smp_active_mask has not been initialized yet, ignore */
199 		if (CPUMASK_TESTMASK(mask, smp_active_mask) == 0)
200 			continue;
201 #endif
202 
203 		if (get_chip_ID(BSPID) != get_chip_ID(i))
204 			continue;
205 		++cores_per_chip;
206 
207 		if (get_core_number_within_chip(BSPID) ==
208 		    get_core_number_within_chip(i)) {
209 			++threads_per_core;
210 		}
211 	}
212 
213 	cores_per_chip /= threads_per_core;
214 	chips_per_package = assumed_ncpus / (cores_per_chip * threads_per_core);
215 
216 	kprintf("CPU Topology: cores_per_chip: %d; threads_per_core: %d; "
217 		"chips_per_package: %d;\n",
218 		cores_per_chip, threads_per_core, chips_per_package);
219 
220 	if (threads_per_core > 1) { /* HT available - 4 levels */
221 
222 		children_no_per_level[0] = chips_per_package;
223 		children_no_per_level[1] = cores_per_chip;
224 		children_no_per_level[2] = threads_per_core;
225 		children_no_per_level[3] = 0;
226 
227 		level_types[0] = PACKAGE_LEVEL;
228 		level_types[1] = CHIP_LEVEL;
229 		level_types[2] = CORE_LEVEL;
230 		level_types[3] = THREAD_LEVEL;
231 
232 		build_topology_tree(children_no_per_level,
233 		    level_types,
234 		    0,
235 		    root,
236 		    &last_free_node,
237 		    &apicid);
238 
239 		cpu_topology_levels_number = 4;
240 
241 	} else if (cores_per_chip > 1) { /* No HT available - 3 levels */
242 
243 		children_no_per_level[0] = chips_per_package;
244 		children_no_per_level[1] = cores_per_chip;
245 		children_no_per_level[2] = 0;
246 
247 		level_types[0] = PACKAGE_LEVEL;
248 		level_types[1] = CHIP_LEVEL;
249 		level_types[2] = CORE_LEVEL;
250 
251 		build_topology_tree(children_no_per_level,
252 		    level_types,
253 		    0,
254 		    root,
255 		    &last_free_node,
256 		    &apicid);
257 
258 		cpu_topology_levels_number = 3;
259 
260 	} else { /* No HT and no Multi-Core - 2 levels */
261 
262 		children_no_per_level[0] = chips_per_package;
263 		children_no_per_level[1] = 0;
264 
265 		level_types[0] = PACKAGE_LEVEL;
266 		level_types[1] = CHIP_LEVEL;
267 
268 		build_topology_tree(children_no_per_level,
269 		    level_types,
270 		    0,
271 		    root,
272 		    &last_free_node,
273 		    &apicid);
274 
275 		cpu_topology_levels_number = 2;
276 
277 	}
278 
279 	cpu_root_node = root;
280 
281 
282 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
283 	if (fix_amd_topology() == 0) {
284 		int visited[MAXCPU], i, j, pos, cpuid;
285 		cpu_node_t *leaf, *parent;
286 
287 		bzero(visited, MAXCPU * sizeof(int));
288 
289 		for (i = 0; i < assumed_ncpus; i++) {
290 			if (visited[i] == 0) {
291 				pos = 0;
292 				visited[i] = 1;
293 				leaf = get_cpu_node_by_cpuid(i);
294 
295 				KASSERT(leaf != NULL, ("cpu %d NULL node", i));
296 				if (leaf->type == CORE_LEVEL) {
297 					parent = leaf->parent_node;
298 
299 					last_free_node->child_node[0] = leaf;
300 					last_free_node->child_no = 1;
301 					last_free_node->members = leaf->members;
302 					last_free_node->compute_unit_id = leaf->compute_unit_id;
303 					last_free_node->parent_node = parent;
304 					last_free_node->type = CORE_LEVEL;
305 
306 
307 					for (j = 0; j < parent->child_no; j++) {
308 						if (parent->child_node[j] != leaf) {
309 
310 							cpuid = BSFCPUMASK(parent->child_node[j]->members);
311 							if (visited[cpuid] == 0 &&
312 							    parent->child_node[j]->compute_unit_id == leaf->compute_unit_id) {
313 
314 								last_free_node->child_node[last_free_node->child_no] = parent->child_node[j];
315 								last_free_node->child_no++;
316 								CPUMASK_ORMASK(last_free_node->members, parent->child_node[j]->members);
317 
318 								parent->child_node[j]->type = THREAD_LEVEL;
319 								parent->child_node[j]->parent_node = last_free_node;
320 								visited[cpuid] = 1;
321 
322 								migrate_elements(parent->child_node, parent->child_no, j);
323 								parent->child_no--;
324 								j--;
325 							}
326 						} else {
327 							pos = j;
328 						}
329 					}
330 					if (last_free_node->child_no > 1) {
331 						parent->child_node[pos] = last_free_node;
332 						leaf->type = THREAD_LEVEL;
333 						leaf->parent_node = last_free_node;
334 						last_free_node++;
335 					}
336 				}
337 			}
338 		}
339 	}
340 #endif
341 }
342 
343 /* Recursive function helper to print the CPU topology tree */
344 static void
345 print_cpu_topology_tree_sysctl_helper(cpu_node_t *node,
346     struct sbuf *sb,
347     char * buf,
348     int buf_len,
349     int last)
350 {
351 	int i;
352 	int bsr_member;
353 
354 	sbuf_bcat(sb, buf, buf_len);
355 	if (last) {
356 		sbuf_printf(sb, "\\-");
357 		buf[buf_len] = ' ';buf_len++;
358 		buf[buf_len] = ' ';buf_len++;
359 	} else {
360 		sbuf_printf(sb, "|-");
361 		buf[buf_len] = '|';buf_len++;
362 		buf[buf_len] = ' ';buf_len++;
363 	}
364 
365 	bsr_member = BSRCPUMASK(node->members);
366 
367 	if (node->type == PACKAGE_LEVEL) {
368 		sbuf_printf(sb,"PACKAGE MEMBERS: ");
369 	} else if (node->type == CHIP_LEVEL) {
370 		sbuf_printf(sb,"CHIP ID %d: ",
371 			get_chip_ID(bsr_member));
372 	} else if (node->type == CORE_LEVEL) {
373 		if (node->compute_unit_id != (uint8_t)-1) {
374 			sbuf_printf(sb,"Compute Unit ID %d: ",
375 				node->compute_unit_id);
376 		} else {
377 			sbuf_printf(sb,"CORE ID %d: ",
378 				get_core_number_within_chip(bsr_member));
379 		}
380 	} else if (node->type == THREAD_LEVEL) {
381 		if (node->compute_unit_id != (uint8_t)-1) {
382 			sbuf_printf(sb,"THREAD ID %d: ",
383 				get_core_number_within_chip(bsr_member));
384 		} else {
385 			sbuf_printf(sb,"THREAD ID %d: ",
386 				get_logical_CPU_number_within_core(bsr_member));
387 		}
388 	} else {
389 		sbuf_printf(sb,"UNKNOWN: ");
390 	}
391 	sbuf_print_cpuset(sb, &node->members);
392 	sbuf_printf(sb,"\n");
393 
394 	for (i = 0; i < node->child_no; i++) {
395 		print_cpu_topology_tree_sysctl_helper(node->child_node[i],
396 		    sb, buf, buf_len, i == (node->child_no -1));
397 	}
398 }
399 
400 /* SYSCTL PROCEDURE for printing the CPU Topology tree */
401 static int
402 print_cpu_topology_tree_sysctl(SYSCTL_HANDLER_ARGS)
403 {
404 	struct sbuf *sb;
405 	int ret;
406 	char buf[INDENT_BUF_SIZE];
407 
408 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
409 
410 	sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
411 	if (sb == NULL) {
412 		return (ENOMEM);
413 	}
414 	sbuf_printf(sb,"\n");
415 	print_cpu_topology_tree_sysctl_helper(cpu_root_node, sb, buf, 0, 1);
416 
417 	sbuf_finish(sb);
418 
419 	ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
420 
421 	sbuf_delete(sb);
422 
423 	return ret;
424 }
425 
426 /* SYSCTL PROCEDURE for printing the CPU Topology level description */
427 static int
428 print_cpu_topology_level_description_sysctl(SYSCTL_HANDLER_ARGS)
429 {
430 	struct sbuf *sb;
431 	int ret;
432 
433 	sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
434 	if (sb == NULL)
435 		return (ENOMEM);
436 
437 	if (cpu_topology_levels_number == 4) /* HT available */
438 		sbuf_printf(sb, "0 - thread; 1 - core; 2 - socket; 3 - anything");
439 	else if (cpu_topology_levels_number == 3) /* No HT available */
440 		sbuf_printf(sb, "0 - core; 1 - socket; 2 - anything");
441 	else if (cpu_topology_levels_number == 2) /* No HT and no Multi-Core */
442 		sbuf_printf(sb, "0 - socket; 1 - anything");
443 	else
444 		sbuf_printf(sb, "Unknown");
445 
446 	sbuf_finish(sb);
447 
448 	ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
449 
450 	sbuf_delete(sb);
451 
452 	return ret;
453 }
454 
455 /* Find a cpu_node_t by a mask */
456 static cpu_node_t *
457 get_cpu_node_by_cpumask(cpu_node_t * node,
458 			cpumask_t mask) {
459 
460 	cpu_node_t * found = NULL;
461 	int i;
462 
463 	if (CPUMASK_CMPMASKEQ(node->members, mask))
464 		return node;
465 
466 	for (i = 0; i < node->child_no; i++) {
467 		found = get_cpu_node_by_cpumask(node->child_node[i], mask);
468 		if (found != NULL) {
469 			return found;
470 		}
471 	}
472 	return NULL;
473 }
474 
475 cpu_node_t *
476 get_cpu_node_by_cpuid(int cpuid) {
477 	cpumask_t mask;
478 
479 	CPUMASK_ASSBIT(mask, cpuid);
480 
481 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
482 
483 	return get_cpu_node_by_cpumask(cpu_root_node, mask);
484 }
485 
486 /* Get the mask of siblings for level_type of a cpuid */
487 cpumask_t
488 get_cpumask_from_level(int cpuid,
489 			uint8_t level_type)
490 {
491 	cpu_node_t * node;
492 	cpumask_t mask;
493 
494 	CPUMASK_ASSBIT(mask, cpuid);
495 
496 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
497 
498 	node = get_cpu_node_by_cpumask(cpu_root_node, mask);
499 
500 	if (node == NULL) {
501 		CPUMASK_ASSZERO(mask);
502 		return mask;
503 	}
504 
505 	while (node != NULL) {
506 		if (node->type == level_type) {
507 			return node->members;
508 		}
509 		node = node->parent_node;
510 	}
511 	CPUMASK_ASSZERO(mask);
512 
513 	return mask;
514 }
515 
516 static const cpu_node_t *
517 get_cpu_node_by_chipid2(const cpu_node_t *node, int chip_id)
518 {
519 	int cpuid;
520 
521 	if (node->type != CHIP_LEVEL) {
522 		const cpu_node_t *ret = NULL;
523 		int i;
524 
525 		for (i = 0; i < node->child_no; ++i) {
526 			ret = get_cpu_node_by_chipid2(node->child_node[i],
527 			    chip_id);
528 			if (ret != NULL)
529 				break;
530 		}
531 		return ret;
532 	}
533 
534 	cpuid = BSRCPUMASK(node->members);
535 	if (get_chip_ID(cpuid) == chip_id)
536 		return node;
537 	return NULL;
538 }
539 
540 const cpu_node_t *
541 get_cpu_node_by_chipid(int chip_id)
542 {
543 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
544 	return get_cpu_node_by_chipid2(cpu_root_node, chip_id);
545 }
546 
547 /* init pcpu_sysctl structure info */
548 static void
549 init_pcpu_topology_sysctl(int assumed_ncpus)
550 {
551 	struct sbuf sb;
552 	cpumask_t mask;
553 	int min_id = -1;
554 	int max_id = -1;
555 	int i;
556 	int phys_id;
557 
558 	pcpu_sysctl = kmalloc(sizeof(*pcpu_sysctl) * MAXCPU, M_PCPUSYS,
559 			      M_INTWAIT | M_ZERO);
560 
561 	for (i = 0; i < assumed_ncpus; i++) {
562 		sbuf_new(&sb, pcpu_sysctl[i].cpu_name,
563 		    sizeof(pcpu_sysctl[i].cpu_name), SBUF_FIXEDLEN);
564 		sbuf_printf(&sb,"cpu%d", i);
565 		sbuf_finish(&sb);
566 
567 
568 		/* Get physical siblings */
569 		mask = get_cpumask_from_level(i, CHIP_LEVEL);
570 		if (CPUMASK_TESTZERO(mask)) {
571 			pcpu_sysctl[i].physical_id = INVALID_ID;
572 			continue;
573 		}
574 
575 		sbuf_new(&sb, pcpu_sysctl[i].physical_siblings,
576 		    sizeof(pcpu_sysctl[i].physical_siblings), SBUF_FIXEDLEN);
577 		sbuf_print_cpuset(&sb, &mask);
578 		sbuf_trim(&sb);
579 		sbuf_finish(&sb);
580 
581 		phys_id = get_chip_ID(i);
582 		pcpu_sysctl[i].physical_id = phys_id;
583 		if (min_id < 0 || min_id > phys_id)
584 			min_id = phys_id;
585 		if (max_id < 0 || max_id < phys_id)
586 			max_id = phys_id;
587 
588 		/* Get core siblings */
589 		mask = get_cpumask_from_level(i, CORE_LEVEL);
590 		if (CPUMASK_TESTZERO(mask)) {
591 			pcpu_sysctl[i].core_id = INVALID_ID;
592 			continue;
593 		}
594 
595 		sbuf_new(&sb, pcpu_sysctl[i].core_siblings,
596 		    sizeof(pcpu_sysctl[i].core_siblings), SBUF_FIXEDLEN);
597 		sbuf_print_cpuset(&sb, &mask);
598 		sbuf_trim(&sb);
599 		sbuf_finish(&sb);
600 
601 		pcpu_sysctl[i].core_id = get_core_number_within_chip(i);
602 		if (cpu_topology_core_ids < pcpu_sysctl[i].core_id + 1)
603 			cpu_topology_core_ids = pcpu_sysctl[i].core_id + 1;
604 
605 		pcpu_sysctl[i].ht_id = get_logical_CPU_number_within_core(i);
606 		if (cpu_topology_ht_ids < pcpu_sysctl[i].ht_id + 1)
607 			cpu_topology_ht_ids = pcpu_sysctl[i].ht_id + 1;
608 	}
609 
610 	/*
611 	 * Normalize physical ids so they can be used by the VM system.
612 	 * Some systems number starting at 0 others number starting at 1.
613 	 */
614 	cpu_topology_phys_ids = max_id - min_id + 1;
615 	if (cpu_topology_phys_ids <= 0)		/* don't crash */
616 		cpu_topology_phys_ids = 1;
617 	for (i = 0; i < assumed_ncpus; i++) {
618 		pcpu_sysctl[i].physical_id %= cpu_topology_phys_ids;
619 	}
620 }
621 
622 /* Build SYSCTL structure for revealing
623  * the CPU Topology to user-space.
624  */
625 static void
626 build_sysctl_cpu_topology(int assumed_ncpus)
627 {
628 	int i;
629 	struct sbuf sb;
630 
631 	/* SYSCTL new leaf for "cpu_topology" */
632 	sysctl_ctx_init(&cpu_topology_sysctl_ctx);
633 	cpu_topology_sysctl_tree = SYSCTL_ADD_NODE(&cpu_topology_sysctl_ctx,
634 	    SYSCTL_STATIC_CHILDREN(_hw),
635 	    OID_AUTO,
636 	    "cpu_topology",
637 	    CTLFLAG_RD, 0, "");
638 
639 	/* SYSCTL cpu_topology "tree" entry */
640 	SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
641 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
642 	    OID_AUTO, "tree", CTLTYPE_STRING | CTLFLAG_RD,
643 	    NULL, 0, print_cpu_topology_tree_sysctl, "A",
644 	    "Tree print of CPU topology");
645 
646 	/* SYSCTL cpu_topology "level_description" entry */
647 	SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
648 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
649 	    OID_AUTO, "level_description", CTLTYPE_STRING | CTLFLAG_RD,
650 	    NULL, 0, print_cpu_topology_level_description_sysctl, "A",
651 	    "Level description of CPU topology");
652 
653 	/* SYSCTL cpu_topology "members" entry */
654 	sbuf_new(&sb, cpu_topology_members,
655 	    sizeof(cpu_topology_members), SBUF_FIXEDLEN);
656 	sbuf_print_cpuset(&sb, &cpu_root_node->members);
657 	sbuf_trim(&sb);
658 	sbuf_finish(&sb);
659 	SYSCTL_ADD_STRING(&cpu_topology_sysctl_ctx,
660 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
661 	    OID_AUTO, "members", CTLFLAG_RD,
662 	    cpu_topology_members, 0,
663 	    "Members of the CPU Topology");
664 
665 	/* SYSCTL per_cpu info */
666 	for (i = 0; i < assumed_ncpus; i++) {
667 		/* New leaf : hw.cpu_topology.cpux */
668 		sysctl_ctx_init(&pcpu_sysctl[i].sysctl_ctx);
669 		pcpu_sysctl[i].sysctl_tree = SYSCTL_ADD_NODE(&pcpu_sysctl[i].sysctl_ctx,
670 		    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
671 		    OID_AUTO,
672 		    pcpu_sysctl[i].cpu_name,
673 		    CTLFLAG_RD, 0, "");
674 
675 		/* Check if the physical_id found is valid */
676 		if (pcpu_sysctl[i].physical_id == INVALID_ID) {
677 			continue;
678 		}
679 
680 		/* Add physical id info */
681 		SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
682 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
683 		    OID_AUTO, "physical_id", CTLFLAG_RD,
684 		    &pcpu_sysctl[i].physical_id, 0,
685 		    "Physical ID");
686 
687 		/* Add physical siblings */
688 		SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
689 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
690 		    OID_AUTO, "physical_siblings", CTLFLAG_RD,
691 		    pcpu_sysctl[i].physical_siblings, 0,
692 		    "Physical siblings");
693 
694 		/* Check if the core_id found is valid */
695 		if (pcpu_sysctl[i].core_id == INVALID_ID) {
696 			continue;
697 		}
698 
699 		/* Add core id info */
700 		SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
701 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
702 		    OID_AUTO, "core_id", CTLFLAG_RD,
703 		    &pcpu_sysctl[i].core_id, 0,
704 		    "Core ID");
705 
706 		/*Add core siblings */
707 		SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
708 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
709 		    OID_AUTO, "core_siblings", CTLFLAG_RD,
710 		    pcpu_sysctl[i].core_siblings, 0,
711 		    "Core siblings");
712 	}
713 }
714 
715 static
716 void
717 sbuf_print_cpuset(struct sbuf *sb, cpumask_t *mask)
718 {
719 	int i;
720 	int b = -1;
721 	int e = -1;
722 	int more = 0;
723 
724 	sbuf_printf(sb, "cpus(");
725 	CPUSET_FOREACH(i, *mask) {
726 		if (b < 0) {
727 			b = i;
728 			e = b + 1;
729 			continue;
730 		}
731 		if (e == i) {
732 			++e;
733 			continue;
734 		}
735 		if (more)
736 			sbuf_printf(sb, ", ");
737 		if (b == e - 1) {
738 			sbuf_printf(sb, "%d", b);
739 		} else {
740 			sbuf_printf(sb, "%d-%d", b, e - 1);
741 		}
742 		more = 1;
743 		b = i;
744 		e = b + 1;
745 	}
746 	if (more)
747 		sbuf_printf(sb, ", ");
748 	if (b >= 0) {
749 		if (b == e - 1) {
750 			sbuf_printf(sb, "%d", b);
751 		} else {
752 			sbuf_printf(sb, "%d-%d", b, e - 1);
753 		}
754 	}
755 	sbuf_printf(sb, ") ");
756 }
757 
758 int
759 get_cpu_ht_id(int cpuid)
760 {
761 	if (pcpu_sysctl)
762 		return(pcpu_sysctl[cpuid].ht_id);
763 	return(0);
764 }
765 
766 int
767 get_cpu_core_id(int cpuid)
768 {
769 	if (pcpu_sysctl)
770 		return(pcpu_sysctl[cpuid].core_id);
771 	return(0);
772 }
773 
774 int
775 get_cpu_phys_id(int cpuid)
776 {
777 	if (pcpu_sysctl)
778 		return(pcpu_sysctl[cpuid].physical_id);
779 	return(0);
780 }
781 
782 /*
783  * Returns the highest amount of memory attached to any single node.
784  * Returns 0 if the system is not NUMA or only has one node.
785  *
786  * This function is used by the scheduler.
787  */
788 long
789 get_highest_node_memory(void)
790 {
791 	long highest = 0;
792 
793         if (cpu_root_node && cpu_root_node->type == PACKAGE_LEVEL &&
794 	    cpu_root_node->child_node[1]) {
795                 cpu_node_t *cpup;
796                 int i;
797 
798                 for (i = 0 ; i < MAXCPU && cpu_root_node->child_node[i]; ++i) {
799                         cpup = cpu_root_node->child_node[i];
800                         if (highest < cpup->phys_mem)
801                                 highest = cpup->phys_mem;
802                 }
803         }
804 	return highest;
805 }
806 
807 extern int naps;
808 
809 /* Build the CPU Topology and SYSCTL Topology tree */
810 static void
811 init_cpu_topology(void)
812 {
813 	int assumed_ncpus;
814 
815 	assumed_ncpus = naps + 1;
816 
817 	build_cpu_topology(assumed_ncpus);
818 	init_pcpu_topology_sysctl(assumed_ncpus);
819 	build_sysctl_cpu_topology(assumed_ncpus);
820 }
821 SYSINIT(cpu_topology, SI_BOOT2_CPU_TOPOLOGY, SI_ORDER_FIRST,
822     init_cpu_topology, NULL);
823