xref: /dragonfly/sys/kern/subr_cpu_topology.c (revision 8af44722)
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  * 3. Neither the name of The DragonFly Project nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific, prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysctl.h>
37 #include <sys/sbuf.h>
38 #include <sys/cpu_topology.h>
39 
40 #include <machine/smp.h>
41 
42 #ifndef NAPICID
43 #define NAPICID 256
44 #endif
45 
46 #define INDENT_BUF_SIZE LEVEL_NO*3
47 #define INVALID_ID -1
48 
49 /* Per-cpu sysctl nodes and info */
50 struct per_cpu_sysctl_info {
51 	struct sysctl_ctx_list sysctl_ctx;
52 	struct sysctl_oid *sysctl_tree;
53 	char cpu_name[32];
54 	int physical_id;
55 	int core_id;
56 	int ht_id;				/* thread id within core */
57 	char physical_siblings[8*MAXCPU];
58 	char core_siblings[8*MAXCPU];
59 };
60 typedef struct per_cpu_sysctl_info per_cpu_sysctl_info_t;
61 
62 /* Memory for topology */
63 __read_frequently static cpu_node_t cpu_topology_nodes[MAXCPU];
64 /* Root node pointer */
65 __read_frequently static cpu_node_t *cpu_root_node;
66 
67 static struct sysctl_ctx_list cpu_topology_sysctl_ctx;
68 static struct sysctl_oid *cpu_topology_sysctl_tree;
69 static char cpu_topology_members[8*MAXCPU];
70 static per_cpu_sysctl_info_t *pcpu_sysctl;
71 static void sbuf_print_cpuset(struct sbuf *sb, cpumask_t *mask);
72 
73 __read_frequently int cpu_topology_levels_number = 1;
74 __read_frequently int cpu_topology_ht_ids;
75 __read_frequently int cpu_topology_core_ids;
76 __read_frequently int cpu_topology_phys_ids;
77 __read_frequently cpu_node_t *root_cpu_node;
78 
79 MALLOC_DEFINE(M_PCPUSYS, "pcpusys", "pcpu sysctl topology");
80 
81 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_ht_ids, CTLFLAG_RW,
82 	   &cpu_topology_ht_ids, 0, "# of logical cores per real core");
83 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_core_ids, CTLFLAG_RW,
84 	   &cpu_topology_core_ids, 0, "# of real cores per package");
85 SYSCTL_INT(_hw, OID_AUTO, cpu_topology_phys_ids, CTLFLAG_RW,
86 	   &cpu_topology_phys_ids, 0, "# of physical packages");
87 
88 /* Get the next valid apicid starting
89  * from current apicid (curr_apicid
90  */
91 static int
92 get_next_valid_apicid(int curr_apicid)
93 {
94 	int next_apicid = curr_apicid;
95 	do {
96 		next_apicid++;
97 	}
98 	while(get_cpuid_from_apicid(next_apicid) == -1 &&
99 	   next_apicid < NAPICID);
100 	if (next_apicid == NAPICID) {
101 		kprintf("Warning: No next valid APICID found. Returning -1\n");
102 		return -1;
103 	}
104 	return next_apicid;
105 }
106 
107 /* Generic topology tree. The parameters have the following meaning:
108  * - children_no_per_level : the number of children on each level
109  * - level_types : the type of the level (THREAD, CORE, CHIP, etc)
110  * - cur_level : the current level of the tree
111  * - node : the current node
112  * - last_free_node : the last free node in the global array.
113  * - cpuid : basicly this are the ids of the leafs
114  */
115 static void
116 build_topology_tree(int *children_no_per_level,
117    uint8_t *level_types,
118    int cur_level,
119    cpu_node_t *node,
120    cpu_node_t **last_free_node,
121    int *apicid)
122 {
123 	int i;
124 
125 	node->child_no = children_no_per_level[cur_level];
126 	node->type = level_types[cur_level];
127 	CPUMASK_ASSZERO(node->members);
128 	node->compute_unit_id = -1;
129 
130 	if (node->child_no == 0) {
131 		*apicid = get_next_valid_apicid(*apicid);
132 		CPUMASK_ASSBIT(node->members, get_cpuid_from_apicid(*apicid));
133 		return;
134 	}
135 
136 	if (node->parent_node == NULL)
137 		root_cpu_node = node;
138 
139 	for (i = 0; i < node->child_no; i++) {
140 		node->child_node[i] = *last_free_node;
141 		(*last_free_node)++;
142 
143 		node->child_node[i]->parent_node = node;
144 
145 		build_topology_tree(children_no_per_level,
146 		    level_types,
147 		    cur_level + 1,
148 		    node->child_node[i],
149 		    last_free_node,
150 		    apicid);
151 
152 		CPUMASK_ORMASK(node->members, node->child_node[i]->members);
153 	}
154 }
155 
156 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
157 static void
158 migrate_elements(cpu_node_t **a, int n, int pos)
159 {
160 	int i;
161 
162 	for (i = pos; i < n - 1 ; i++) {
163 		a[i] = a[i+1];
164 	}
165 	a[i] = NULL;
166 }
167 #endif
168 
169 /* Build CPU topology. The detection is made by comparing the
170  * chip, core and logical IDs of each CPU with the IDs of the
171  * BSP. When we found a match, at that level the CPUs are siblings.
172  */
173 static void
174 build_cpu_topology(int assumed_ncpus)
175 {
176 	int i;
177 	int BSPID = 0;
178 	int threads_per_core = 0;
179 	int cores_per_chip = 0;
180 	int chips_per_package = 0;
181 	int children_no_per_level[LEVEL_NO];
182 	uint8_t level_types[LEVEL_NO];
183 	int apicid = -1;
184 	cpu_node_t *root = &cpu_topology_nodes[0];
185 	cpu_node_t *last_free_node = root + 1;
186 
187 	detect_cpu_topology();
188 
189 	/*
190 	 * Assume that the topology is uniform.
191 	 * Find the number of siblings within the chip
192 	 * and within the core to build up the topology.
193 	 */
194 	for (i = 0; i < assumed_ncpus; i++) {
195 		cpumask_t mask;
196 
197 		CPUMASK_ASSBIT(mask, i);
198 
199 #if 0
200 		/* smp_active_mask has not been initialized yet, ignore */
201 		if (CPUMASK_TESTMASK(mask, smp_active_mask) == 0)
202 			continue;
203 #endif
204 
205 		if (get_chip_ID(BSPID) != get_chip_ID(i))
206 			continue;
207 		++cores_per_chip;
208 
209 		if (get_core_number_within_chip(BSPID) ==
210 		    get_core_number_within_chip(i)) {
211 			++threads_per_core;
212 		}
213 	}
214 
215 	cores_per_chip /= threads_per_core;
216 	chips_per_package = assumed_ncpus / (cores_per_chip * threads_per_core);
217 
218 	kprintf("CPU Topology: cores_per_chip: %d; threads_per_core: %d; "
219 		"chips_per_package: %d;\n",
220 		cores_per_chip, threads_per_core, chips_per_package);
221 
222 	if (threads_per_core > 1) { /* HT available - 4 levels */
223 
224 		children_no_per_level[0] = chips_per_package;
225 		children_no_per_level[1] = cores_per_chip;
226 		children_no_per_level[2] = threads_per_core;
227 		children_no_per_level[3] = 0;
228 
229 		level_types[0] = PACKAGE_LEVEL;
230 		level_types[1] = CHIP_LEVEL;
231 		level_types[2] = CORE_LEVEL;
232 		level_types[3] = THREAD_LEVEL;
233 
234 		build_topology_tree(children_no_per_level,
235 		    level_types,
236 		    0,
237 		    root,
238 		    &last_free_node,
239 		    &apicid);
240 
241 		cpu_topology_levels_number = 4;
242 
243 	} else if (cores_per_chip > 1) { /* No HT available - 3 levels */
244 
245 		children_no_per_level[0] = chips_per_package;
246 		children_no_per_level[1] = cores_per_chip;
247 		children_no_per_level[2] = 0;
248 
249 		level_types[0] = PACKAGE_LEVEL;
250 		level_types[1] = CHIP_LEVEL;
251 		level_types[2] = CORE_LEVEL;
252 
253 		build_topology_tree(children_no_per_level,
254 		    level_types,
255 		    0,
256 		    root,
257 		    &last_free_node,
258 		    &apicid);
259 
260 		cpu_topology_levels_number = 3;
261 
262 	} else { /* No HT and no Multi-Core - 2 levels */
263 
264 		children_no_per_level[0] = chips_per_package;
265 		children_no_per_level[1] = 0;
266 
267 		level_types[0] = PACKAGE_LEVEL;
268 		level_types[1] = CHIP_LEVEL;
269 
270 		build_topology_tree(children_no_per_level,
271 		    level_types,
272 		    0,
273 		    root,
274 		    &last_free_node,
275 		    &apicid);
276 
277 		cpu_topology_levels_number = 2;
278 
279 	}
280 
281 	cpu_root_node = root;
282 
283 
284 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
285 	if (fix_amd_topology() == 0) {
286 		int visited[MAXCPU], i, j, pos, cpuid;
287 		cpu_node_t *leaf, *parent;
288 
289 		bzero(visited, MAXCPU * sizeof(int));
290 
291 		for (i = 0; i < assumed_ncpus; i++) {
292 			if (visited[i] == 0) {
293 				pos = 0;
294 				visited[i] = 1;
295 				leaf = get_cpu_node_by_cpuid(i);
296 
297 				KASSERT(leaf != NULL, ("cpu %d NULL node", i));
298 				if (leaf->type == CORE_LEVEL) {
299 					parent = leaf->parent_node;
300 
301 					last_free_node->child_node[0] = leaf;
302 					last_free_node->child_no = 1;
303 					last_free_node->members = leaf->members;
304 					last_free_node->compute_unit_id = leaf->compute_unit_id;
305 					last_free_node->parent_node = parent;
306 					last_free_node->type = CORE_LEVEL;
307 
308 
309 					for (j = 0; j < parent->child_no; j++) {
310 						if (parent->child_node[j] != leaf) {
311 
312 							cpuid = BSFCPUMASK(parent->child_node[j]->members);
313 							if (visited[cpuid] == 0 &&
314 							    parent->child_node[j]->compute_unit_id == leaf->compute_unit_id) {
315 
316 								last_free_node->child_node[last_free_node->child_no] = parent->child_node[j];
317 								last_free_node->child_no++;
318 								CPUMASK_ORMASK(last_free_node->members, parent->child_node[j]->members);
319 
320 								parent->child_node[j]->type = THREAD_LEVEL;
321 								parent->child_node[j]->parent_node = last_free_node;
322 								visited[cpuid] = 1;
323 
324 								migrate_elements(parent->child_node, parent->child_no, j);
325 								parent->child_no--;
326 								j--;
327 							}
328 						} else {
329 							pos = j;
330 						}
331 					}
332 					if (last_free_node->child_no > 1) {
333 						parent->child_node[pos] = last_free_node;
334 						leaf->type = THREAD_LEVEL;
335 						leaf->parent_node = last_free_node;
336 						last_free_node++;
337 					}
338 				}
339 			}
340 		}
341 	}
342 #endif
343 }
344 
345 /* Recursive function helper to print the CPU topology tree */
346 static void
347 print_cpu_topology_tree_sysctl_helper(cpu_node_t *node,
348     struct sbuf *sb,
349     char * buf,
350     int buf_len,
351     int last)
352 {
353 	int i;
354 	int bsr_member;
355 
356 	sbuf_bcat(sb, buf, buf_len);
357 	if (last) {
358 		sbuf_printf(sb, "\\-");
359 		buf[buf_len] = ' ';buf_len++;
360 		buf[buf_len] = ' ';buf_len++;
361 	} else {
362 		sbuf_printf(sb, "|-");
363 		buf[buf_len] = '|';buf_len++;
364 		buf[buf_len] = ' ';buf_len++;
365 	}
366 
367 	bsr_member = BSRCPUMASK(node->members);
368 
369 	if (node->type == PACKAGE_LEVEL) {
370 		sbuf_printf(sb,"PACKAGE MEMBERS: ");
371 	} else if (node->type == CHIP_LEVEL) {
372 		sbuf_printf(sb,"CHIP ID %d: ",
373 			get_chip_ID(bsr_member));
374 	} else if (node->type == CORE_LEVEL) {
375 		if (node->compute_unit_id != (uint8_t)-1) {
376 			sbuf_printf(sb,"Compute Unit ID %d: ",
377 				node->compute_unit_id);
378 		} else {
379 			sbuf_printf(sb,"CORE ID %d: ",
380 				get_core_number_within_chip(bsr_member));
381 		}
382 	} else if (node->type == THREAD_LEVEL) {
383 		if (node->compute_unit_id != (uint8_t)-1) {
384 			sbuf_printf(sb,"THREAD ID %d: ",
385 				get_core_number_within_chip(bsr_member));
386 		} else {
387 			sbuf_printf(sb,"THREAD ID %d: ",
388 				get_logical_CPU_number_within_core(bsr_member));
389 		}
390 	} else {
391 		sbuf_printf(sb,"UNKNOWN: ");
392 	}
393 	sbuf_print_cpuset(sb, &node->members);
394 	sbuf_printf(sb,"\n");
395 
396 	for (i = 0; i < node->child_no; i++) {
397 		print_cpu_topology_tree_sysctl_helper(node->child_node[i],
398 		    sb, buf, buf_len, i == (node->child_no -1));
399 	}
400 }
401 
402 /* SYSCTL PROCEDURE for printing the CPU Topology tree */
403 static int
404 print_cpu_topology_tree_sysctl(SYSCTL_HANDLER_ARGS)
405 {
406 	struct sbuf *sb;
407 	int ret;
408 	char buf[INDENT_BUF_SIZE];
409 
410 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
411 
412 	sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
413 	if (sb == NULL) {
414 		return (ENOMEM);
415 	}
416 	sbuf_printf(sb,"\n");
417 	print_cpu_topology_tree_sysctl_helper(cpu_root_node, sb, buf, 0, 1);
418 
419 	sbuf_finish(sb);
420 
421 	ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
422 
423 	sbuf_delete(sb);
424 
425 	return ret;
426 }
427 
428 /* SYSCTL PROCEDURE for printing the CPU Topology level description */
429 static int
430 print_cpu_topology_level_description_sysctl(SYSCTL_HANDLER_ARGS)
431 {
432 	struct sbuf *sb;
433 	int ret;
434 
435 	sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
436 	if (sb == NULL)
437 		return (ENOMEM);
438 
439 	if (cpu_topology_levels_number == 4) /* HT available */
440 		sbuf_printf(sb, "0 - thread; 1 - core; 2 - socket; 3 - anything");
441 	else if (cpu_topology_levels_number == 3) /* No HT available */
442 		sbuf_printf(sb, "0 - core; 1 - socket; 2 - anything");
443 	else if (cpu_topology_levels_number == 2) /* No HT and no Multi-Core */
444 		sbuf_printf(sb, "0 - socket; 1 - anything");
445 	else
446 		sbuf_printf(sb, "Unknown");
447 
448 	sbuf_finish(sb);
449 
450 	ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
451 
452 	sbuf_delete(sb);
453 
454 	return ret;
455 }
456 
457 /* Find a cpu_node_t by a mask */
458 static cpu_node_t *
459 get_cpu_node_by_cpumask(cpu_node_t * node,
460 			cpumask_t mask) {
461 
462 	cpu_node_t * found = NULL;
463 	int i;
464 
465 	if (CPUMASK_CMPMASKEQ(node->members, mask))
466 		return node;
467 
468 	for (i = 0; i < node->child_no; i++) {
469 		found = get_cpu_node_by_cpumask(node->child_node[i], mask);
470 		if (found != NULL) {
471 			return found;
472 		}
473 	}
474 	return NULL;
475 }
476 
477 cpu_node_t *
478 get_cpu_node_by_cpuid(int cpuid) {
479 	cpumask_t mask;
480 
481 	CPUMASK_ASSBIT(mask, cpuid);
482 
483 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
484 
485 	return get_cpu_node_by_cpumask(cpu_root_node, mask);
486 }
487 
488 /* Get the mask of siblings for level_type of a cpuid */
489 cpumask_t
490 get_cpumask_from_level(int cpuid,
491 			uint8_t level_type)
492 {
493 	cpu_node_t * node;
494 	cpumask_t mask;
495 
496 	CPUMASK_ASSBIT(mask, cpuid);
497 
498 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
499 
500 	node = get_cpu_node_by_cpumask(cpu_root_node, mask);
501 
502 	if (node == NULL) {
503 		CPUMASK_ASSZERO(mask);
504 		return mask;
505 	}
506 
507 	while (node != NULL) {
508 		if (node->type == level_type) {
509 			return node->members;
510 		}
511 		node = node->parent_node;
512 	}
513 	CPUMASK_ASSZERO(mask);
514 
515 	return mask;
516 }
517 
518 static const cpu_node_t *
519 get_cpu_node_by_chipid2(const cpu_node_t *node, int chip_id)
520 {
521 	int cpuid;
522 
523 	if (node->type != CHIP_LEVEL) {
524 		const cpu_node_t *ret = NULL;
525 		int i;
526 
527 		for (i = 0; i < node->child_no; ++i) {
528 			ret = get_cpu_node_by_chipid2(node->child_node[i],
529 			    chip_id);
530 			if (ret != NULL)
531 				break;
532 		}
533 		return ret;
534 	}
535 
536 	cpuid = BSRCPUMASK(node->members);
537 	if (get_chip_ID(cpuid) == chip_id)
538 		return node;
539 	return NULL;
540 }
541 
542 const cpu_node_t *
543 get_cpu_node_by_chipid(int chip_id)
544 {
545 	KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
546 	return get_cpu_node_by_chipid2(cpu_root_node, chip_id);
547 }
548 
549 /* init pcpu_sysctl structure info */
550 static void
551 init_pcpu_topology_sysctl(int assumed_ncpus)
552 {
553 	struct sbuf sb;
554 	cpumask_t mask;
555 	int min_id = -1;
556 	int max_id = -1;
557 	int i;
558 	int phys_id;
559 
560 	pcpu_sysctl = kmalloc(sizeof(*pcpu_sysctl) * MAXCPU, M_PCPUSYS,
561 			      M_INTWAIT | M_ZERO);
562 
563 	for (i = 0; i < assumed_ncpus; i++) {
564 		sbuf_new(&sb, pcpu_sysctl[i].cpu_name,
565 		    sizeof(pcpu_sysctl[i].cpu_name), SBUF_FIXEDLEN);
566 		sbuf_printf(&sb,"cpu%d", i);
567 		sbuf_finish(&sb);
568 
569 
570 		/* Get physical siblings */
571 		mask = get_cpumask_from_level(i, CHIP_LEVEL);
572 		if (CPUMASK_TESTZERO(mask)) {
573 			pcpu_sysctl[i].physical_id = INVALID_ID;
574 			continue;
575 		}
576 
577 		sbuf_new(&sb, pcpu_sysctl[i].physical_siblings,
578 		    sizeof(pcpu_sysctl[i].physical_siblings), SBUF_FIXEDLEN);
579 		sbuf_print_cpuset(&sb, &mask);
580 		sbuf_trim(&sb);
581 		sbuf_finish(&sb);
582 
583 		phys_id = get_chip_ID(i);
584 		pcpu_sysctl[i].physical_id = phys_id;
585 		if (min_id < 0 || min_id > phys_id)
586 			min_id = phys_id;
587 		if (max_id < 0 || max_id < phys_id)
588 			max_id = phys_id;
589 
590 		/* Get core siblings */
591 		mask = get_cpumask_from_level(i, CORE_LEVEL);
592 		if (CPUMASK_TESTZERO(mask)) {
593 			pcpu_sysctl[i].core_id = INVALID_ID;
594 			continue;
595 		}
596 
597 		sbuf_new(&sb, pcpu_sysctl[i].core_siblings,
598 		    sizeof(pcpu_sysctl[i].core_siblings), SBUF_FIXEDLEN);
599 		sbuf_print_cpuset(&sb, &mask);
600 		sbuf_trim(&sb);
601 		sbuf_finish(&sb);
602 
603 		pcpu_sysctl[i].core_id = get_core_number_within_chip(i);
604 		if (cpu_topology_core_ids < pcpu_sysctl[i].core_id + 1)
605 			cpu_topology_core_ids = pcpu_sysctl[i].core_id + 1;
606 
607 		pcpu_sysctl[i].ht_id = get_logical_CPU_number_within_core(i);
608 		if (cpu_topology_ht_ids < pcpu_sysctl[i].ht_id + 1)
609 			cpu_topology_ht_ids = pcpu_sysctl[i].ht_id + 1;
610 	}
611 
612 	/*
613 	 * Normalize physical ids so they can be used by the VM system.
614 	 * Some systems number starting at 0 others number starting at 1.
615 	 */
616 	cpu_topology_phys_ids = max_id - min_id + 1;
617 	if (cpu_topology_phys_ids <= 0)		/* don't crash */
618 		cpu_topology_phys_ids = 1;
619 	for (i = 0; i < assumed_ncpus; i++) {
620 		pcpu_sysctl[i].physical_id %= cpu_topology_phys_ids;
621 	}
622 }
623 
624 /* Build SYSCTL structure for revealing
625  * the CPU Topology to user-space.
626  */
627 static void
628 build_sysctl_cpu_topology(int assumed_ncpus)
629 {
630 	int i;
631 	struct sbuf sb;
632 
633 	/* SYSCTL new leaf for "cpu_topology" */
634 	sysctl_ctx_init(&cpu_topology_sysctl_ctx);
635 	cpu_topology_sysctl_tree = SYSCTL_ADD_NODE(&cpu_topology_sysctl_ctx,
636 	    SYSCTL_STATIC_CHILDREN(_hw),
637 	    OID_AUTO,
638 	    "cpu_topology",
639 	    CTLFLAG_RD, 0, "");
640 
641 	/* SYSCTL cpu_topology "tree" entry */
642 	SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
643 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
644 	    OID_AUTO, "tree", CTLTYPE_STRING | CTLFLAG_RD,
645 	    NULL, 0, print_cpu_topology_tree_sysctl, "A",
646 	    "Tree print of CPU topology");
647 
648 	/* SYSCTL cpu_topology "level_description" entry */
649 	SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
650 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
651 	    OID_AUTO, "level_description", CTLTYPE_STRING | CTLFLAG_RD,
652 	    NULL, 0, print_cpu_topology_level_description_sysctl, "A",
653 	    "Level description of CPU topology");
654 
655 	/* SYSCTL cpu_topology "members" entry */
656 	sbuf_new(&sb, cpu_topology_members,
657 	    sizeof(cpu_topology_members), SBUF_FIXEDLEN);
658 	sbuf_print_cpuset(&sb, &cpu_root_node->members);
659 	sbuf_trim(&sb);
660 	sbuf_finish(&sb);
661 	SYSCTL_ADD_STRING(&cpu_topology_sysctl_ctx,
662 	    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
663 	    OID_AUTO, "members", CTLFLAG_RD,
664 	    cpu_topology_members, 0,
665 	    "Members of the CPU Topology");
666 
667 	/* SYSCTL per_cpu info */
668 	for (i = 0; i < assumed_ncpus; i++) {
669 		/* New leaf : hw.cpu_topology.cpux */
670 		sysctl_ctx_init(&pcpu_sysctl[i].sysctl_ctx);
671 		pcpu_sysctl[i].sysctl_tree = SYSCTL_ADD_NODE(&pcpu_sysctl[i].sysctl_ctx,
672 		    SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
673 		    OID_AUTO,
674 		    pcpu_sysctl[i].cpu_name,
675 		    CTLFLAG_RD, 0, "");
676 
677 		/* Check if the physical_id found is valid */
678 		if (pcpu_sysctl[i].physical_id == INVALID_ID) {
679 			continue;
680 		}
681 
682 		/* Add physical id info */
683 		SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
684 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
685 		    OID_AUTO, "physical_id", CTLFLAG_RD,
686 		    &pcpu_sysctl[i].physical_id, 0,
687 		    "Physical ID");
688 
689 		/* Add physical siblings */
690 		SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
691 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
692 		    OID_AUTO, "physical_siblings", CTLFLAG_RD,
693 		    pcpu_sysctl[i].physical_siblings, 0,
694 		    "Physical siblings");
695 
696 		/* Check if the core_id found is valid */
697 		if (pcpu_sysctl[i].core_id == INVALID_ID) {
698 			continue;
699 		}
700 
701 		/* Add core id info */
702 		SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
703 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
704 		    OID_AUTO, "core_id", CTLFLAG_RD,
705 		    &pcpu_sysctl[i].core_id, 0,
706 		    "Core ID");
707 
708 		/*Add core siblings */
709 		SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
710 		    SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
711 		    OID_AUTO, "core_siblings", CTLFLAG_RD,
712 		    pcpu_sysctl[i].core_siblings, 0,
713 		    "Core siblings");
714 	}
715 }
716 
717 static
718 void
719 sbuf_print_cpuset(struct sbuf *sb, cpumask_t *mask)
720 {
721 	int i;
722 	int b = -1;
723 	int e = -1;
724 	int more = 0;
725 
726 	sbuf_printf(sb, "cpus(");
727 	CPUSET_FOREACH(i, *mask) {
728 		if (b < 0) {
729 			b = i;
730 			e = b + 1;
731 			continue;
732 		}
733 		if (e == i) {
734 			++e;
735 			continue;
736 		}
737 		if (more)
738 			sbuf_printf(sb, ", ");
739 		if (b == e - 1) {
740 			sbuf_printf(sb, "%d", b);
741 		} else {
742 			sbuf_printf(sb, "%d-%d", b, e - 1);
743 		}
744 		more = 1;
745 		b = i;
746 		e = b + 1;
747 	}
748 	if (more)
749 		sbuf_printf(sb, ", ");
750 	if (b >= 0) {
751 		if (b == e - 1) {
752 			sbuf_printf(sb, "%d", b);
753 		} else {
754 			sbuf_printf(sb, "%d-%d", b, e - 1);
755 		}
756 	}
757 	sbuf_printf(sb, ") ");
758 }
759 
760 int
761 get_cpu_ht_id(int cpuid)
762 {
763 	if (pcpu_sysctl)
764 		return(pcpu_sysctl[cpuid].ht_id);
765 	return(0);
766 }
767 
768 int
769 get_cpu_core_id(int cpuid)
770 {
771 	if (pcpu_sysctl)
772 		return(pcpu_sysctl[cpuid].core_id);
773 	return(0);
774 }
775 
776 int
777 get_cpu_phys_id(int cpuid)
778 {
779 	if (pcpu_sysctl)
780 		return(pcpu_sysctl[cpuid].physical_id);
781 	return(0);
782 }
783 
784 /*
785  * Returns the highest amount of memory attached to any single node.
786  * Returns 0 if the system is not NUMA or only has one node.
787  *
788  * This function is used by the scheduler.
789  */
790 long
791 get_highest_node_memory(void)
792 {
793 	long highest = 0;
794 
795         if (cpu_root_node && cpu_root_node->type == PACKAGE_LEVEL &&
796 	    cpu_root_node->child_node[1]) {
797                 cpu_node_t *cpup;
798                 int i;
799 
800                 for (i = 0 ; i < MAXCPU && cpu_root_node->child_node[i]; ++i) {
801                         cpup = cpu_root_node->child_node[i];
802                         if (highest < cpup->phys_mem)
803                                 highest = cpup->phys_mem;
804                 }
805         }
806 	return highest;
807 }
808 
809 extern int naps;
810 
811 /* Build the CPU Topology and SYSCTL Topology tree */
812 static void
813 init_cpu_topology(void)
814 {
815 	int assumed_ncpus;
816 
817 	assumed_ncpus = naps + 1;
818 
819 	build_cpu_topology(assumed_ncpus);
820 	init_pcpu_topology_sysctl(assumed_ncpus);
821 	build_sysctl_cpu_topology(assumed_ncpus);
822 }
823 SYSINIT(cpu_topology, SI_BOOT2_CPU_TOPOLOGY, SI_ORDER_FIRST,
824     init_cpu_topology, NULL);
825