xref: /illumos-gate/usr/src/uts/common/os/pg.c (revision d362b749)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/systm.h>
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/thread.h>
32 #include <sys/cpuvar.h>
33 #include <sys/cpupart.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/kstat.h>
37 #include <sys/processor.h>
38 #include <sys/disp.h>
39 #include <sys/group.h>
40 #include <sys/pg.h>
41 
42 /*
43  * Processor groups
44  *
45  * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
46  * it is no longer necessarily true that a given physical processor module
47  * will present itself as a single schedulable entity (cpu_t). Rather, each
48  * chip and/or processor core may present itself as one or more "logical" CPUs.
49  *
50  * The logical CPUs presented may share physical components such as caches,
51  * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
52  * kernel be aware of the relationships existing between logical CPUs so that
53  * the appropriate optmizations may be employed.
54  *
55  * The processor group abstraction represents a set of logical CPUs that
56  * generally share some sort of physical or characteristic relationship.
57  *
58  * In the case of a physical sharing relationship, the CPUs in the group may
59  * share a pipeline, cache or floating point unit. In the case of a logical
60  * relationship, a PG may represent the set of CPUs in a processor set, or the
61  * set of CPUs running at a particular clock speed.
62  *
63  * The generic processor group structure, pg_t, contains the elements generic
64  * to a group of CPUs. Depending on the nature of the CPU relationship
65  * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
66  * PG where more specific data is represented.
67  *
68  * As an example, a PG representing a PHYSICAL relationship, may be recast to
69  * a pghw_t, where data further describing the hardware sharing relationship
70  * is maintained. See pghw.c and pghw.h for details on physical PGs.
71  *
72  * At this time a more specialized casting of a PG representing a LOGICAL
73  * relationship has not been implemented, but the architecture allows for this
74  * in the future.
75  *
76  * Processor Group Classes
77  *
78  * Processor group consumers may wish to maintain and associate specific
79  * data with the PGs they create. For this reason, a mechanism for creating
80  * class specific PGs exists. Classes may overload the default functions for
81  * creating, destroying, and associating CPUs with PGs, and may also register
82  * class specific callbacks to be invoked when the CPU related system
83  * configuration changes. Class specific data is stored/associated with
84  * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
85  * element of a class specific PG object. In memory, such a structure may look
86  * like:
87  *
88  * ----------------------- - - -
89  * | common              | | | |  <--(pg_t *)
90  * ----------------------- | | -
91  * | HW specific         | | | <-----(pghw_t *)
92  * ----------------------- | -
93  * | class specific      | | <-------(pg_cmt_t *)
94  * ----------------------- -
95  *
96  * Access to the PG class specific data can be had by casting a pointer to
97  * it's class specific view.
98  */
99 
100 static pg_t		*pg_alloc_default(pg_class_t);
101 static void		pg_free_default(pg_t *);
102 
103 /*
104  * Bootstrap CPU specific PG data
105  * See pg_cpu_bootstrap()
106  */
107 static cpu_pg_t		bootstrap_pg_data;
108 
109 /*
110  * Bitset of allocated PG ids (they are sequential)
111  * and the next free id in the set.
112  */
113 static bitset_t		pg_id_set;
114 static pgid_t		pg_id_next = 0;
115 
116 /*
117  * Default and externed PG ops vectors
118  */
119 static struct pg_ops pg_ops_default = {
120 	pg_alloc_default,	/* alloc */
121 	pg_free_default,	/* free */
122 	NULL,			/* cpu_init */
123 	NULL,			/* cpu_fini */
124 	NULL,			/* cpu_active */
125 	NULL,			/* cpu_inactive */
126 	NULL,			/* cpupart_in */
127 	NULL,			/* cpupart_out */
128 	NULL,			/* cpupart_move */
129 	NULL,			/* cpu_belongs */
130 };
131 
132 /*
133  * Class specific PG allocation callbacks
134  */
135 #define	PG_ALLOC(class)							\
136 	(pg_classes[class].pgc_ops->alloc ?				\
137 	    pg_classes[class].pgc_ops->alloc() :			\
138 	    pg_classes[pg_default_cid].pgc_ops->alloc())
139 
140 #define	PG_FREE(pg)							\
141 	((pg)->pg_class->pgc_ops->free ?				\
142 	    (pg)->pg_class->pgc_ops->free(pg) :				\
143 	    pg_classes[pg_default_cid].pgc_ops->free(pg))		\
144 
145 
146 /*
147  * Class specific membership test callback
148  */
149 #define	PG_CPU_BELONGS(pg, cp)						\
150 	((pg)->pg_class->pgc_ops->cpu_belongs ?				\
151 	    (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0)		\
152 
153 /*
154  * CPU configuration callbacks
155  */
156 #define	PG_CPU_INIT(class, cp)						\
157 {									\
158 	if (pg_classes[class].pgc_ops->cpu_init)			\
159 		pg_classes[class].pgc_ops->cpu_init(cp);		\
160 }
161 
162 #define	PG_CPU_FINI(class, cp)						\
163 {									\
164 	if (pg_classes[class].pgc_ops->cpu_fini)			\
165 		pg_classes[class].pgc_ops->cpu_fini(cp);		\
166 }
167 
168 #define	PG_CPU_ACTIVE(class, cp)					\
169 {									\
170 	if (pg_classes[class].pgc_ops->cpu_active)			\
171 		pg_classes[class].pgc_ops->cpu_active(cp);		\
172 }
173 
174 #define	PG_CPU_INACTIVE(class, cp)					\
175 {									\
176 	if (pg_classes[class].pgc_ops->cpu_inactive)			\
177 		pg_classes[class].pgc_ops->cpu_inactive(cp);		\
178 }
179 
180 /*
181  * CPU / cpupart configuration callbacks
182  */
183 #define	PG_CPUPART_IN(class, cp, pp)					\
184 {									\
185 	if (pg_classes[class].pgc_ops->cpupart_in)			\
186 		pg_classes[class].pgc_ops->cpupart_in(cp, pp);		\
187 }
188 
189 #define	PG_CPUPART_OUT(class, cp, pp)					\
190 {									\
191 	if (pg_classes[class].pgc_ops->cpupart_out)			\
192 		pg_classes[class].pgc_ops->cpupart_out(cp, pp);		\
193 }
194 
195 #define	PG_CPUPART_MOVE(class, cp, old, new)				\
196 {									\
197 	if (pg_classes[class].pgc_ops->cpupart_move)			\
198 		pg_classes[class].pgc_ops->cpupart_move(cp, old, new);	\
199 }
200 
201 
202 
203 static pg_class_t	*pg_classes;
204 static int		pg_nclasses;
205 
206 static pg_cid_t		pg_default_cid;
207 
208 /*
209  * Initialze common PG subsystem. Perform CPU 0 initialization
210  */
211 void
212 pg_init(void)
213 {
214 	pg_default_cid =
215 	    pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
216 }
217 
218 /*
219  * Perform CPU 0 initialization
220  */
221 void
222 pg_cpu0_init(void)
223 {
224 	extern void pghw_physid_create();
225 
226 	/*
227 	 * Create the physical ID cache for the boot CPU
228 	 */
229 	pghw_physid_create(CPU);
230 
231 	/*
232 	 * pg_cpu_* require that cpu_lock be held
233 	 */
234 	mutex_enter(&cpu_lock);
235 
236 	pg_cpu_init(CPU);
237 	pg_cpupart_in(CPU, &cp_default);
238 	pg_cpu_active(CPU);
239 
240 	mutex_exit(&cpu_lock);
241 }
242 
243 /*
244  * Invoked when topology for CPU0 changes
245  * post pg_cpu0_init().
246  *
247  * Currently happens as a result of null_proc_lpa
248  * on Starcat.
249  */
250 void
251 pg_cpu0_reinit(void)
252 {
253 	mutex_enter(&cpu_lock);
254 	pg_cpu_inactive(CPU);
255 	pg_cpupart_out(CPU, &cp_default);
256 	pg_cpu_fini(CPU);
257 
258 	pg_cpu_init(CPU);
259 	pg_cpupart_in(CPU, &cp_default);
260 	pg_cpu_active(CPU);
261 	mutex_exit(&cpu_lock);
262 }
263 
264 /*
265  * Register a new PG class
266  */
267 pg_cid_t
268 pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
269 {
270 	pg_class_t	*newclass;
271 	pg_class_t	*classes_old;
272 	id_t		cid;
273 
274 	mutex_enter(&cpu_lock);
275 
276 	/*
277 	 * Allocate a new pg_class_t in the pg_classes array
278 	 */
279 	if (pg_nclasses == 0) {
280 		pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
281 	} else {
282 		classes_old = pg_classes;
283 		pg_classes =
284 		    kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
285 			KM_SLEEP);
286 		(void) kcopy(classes_old, pg_classes,
287 		    sizeof (pg_class_t) * pg_nclasses);
288 		kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
289 	}
290 
291 	cid = pg_nclasses++;
292 	newclass = &pg_classes[cid];
293 
294 	(void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
295 	newclass->pgc_id = cid;
296 	newclass->pgc_ops = ops;
297 	newclass->pgc_relation = relation;
298 
299 	mutex_exit(&cpu_lock);
300 
301 	return (cid);
302 }
303 
304 /*
305  * Try to find an existing pg in set in which to place cp.
306  * Returns the pg if found, and NULL otherwise.
307  * In the event that the CPU could belong to multiple
308  * PGs in the set, the first matching PG will be returned.
309  */
310 pg_t *
311 pg_cpu_find_pg(cpu_t *cp, group_t *set)
312 {
313 	pg_t		*pg;
314 	group_iter_t	i;
315 
316 	group_iter_init(&i);
317 	while ((pg = group_iterate(set, &i)) != NULL) {
318 		/*
319 		 * Ask the class if the CPU belongs here
320 		 */
321 		if (PG_CPU_BELONGS(pg, cp))
322 			return (pg);
323 	}
324 	return (NULL);
325 }
326 
327 /*
328  * Iterate over the CPUs in a PG after initializing
329  * the iterator with PG_CPU_ITR_INIT()
330  */
331 cpu_t *
332 pg_cpu_next(pg_cpu_itr_t *itr)
333 {
334 	cpu_t		*cpu;
335 	pg_t		*pg = itr->pg;
336 
337 	cpu = group_iterate(&pg->pg_cpus, &itr->position);
338 	return (cpu);
339 }
340 
341 /*
342  * Create a PG of a given class.
343  * This routine may block.
344  */
345 pg_t *
346 pg_create(pg_cid_t cid)
347 {
348 	pg_t	*pg;
349 	pgid_t	id;
350 
351 	ASSERT(MUTEX_HELD(&cpu_lock));
352 
353 	/*
354 	 * Call the class specific PG allocation routine
355 	 */
356 	pg = PG_ALLOC(cid);
357 	pg->pg_class = &pg_classes[cid];
358 	pg->pg_relation = pg->pg_class->pgc_relation;
359 
360 	/*
361 	 * Find the next free sequential pg id
362 	 */
363 	do {
364 		if (pg_id_next >= bitset_capacity(&pg_id_set))
365 			bitset_resize(&pg_id_set, pg_id_next + 1);
366 		id = pg_id_next++;
367 	} while (bitset_in_set(&pg_id_set, id));
368 
369 	pg->pg_id = id;
370 	bitset_add(&pg_id_set, pg->pg_id);
371 
372 	/*
373 	 * Create the PG's CPU group
374 	 */
375 	group_create(&pg->pg_cpus);
376 
377 	return (pg);
378 }
379 
380 /*
381  * Destroy a PG.
382  * This routine may block.
383  */
384 void
385 pg_destroy(pg_t *pg)
386 {
387 	ASSERT(MUTEX_HELD(&cpu_lock));
388 
389 	group_destroy(&pg->pg_cpus);
390 
391 	/*
392 	 * Unassign the pg_id
393 	 */
394 	if (pg_id_next > pg->pg_id)
395 		pg_id_next = pg->pg_id;
396 	bitset_del(&pg_id_set, pg->pg_id);
397 
398 	/*
399 	 * Invoke the class specific de-allocation routine
400 	 */
401 	PG_FREE(pg);
402 }
403 
404 /*
405  * Add the CPU "cp" to processor group "pg"
406  * This routine may block.
407  */
408 void
409 pg_cpu_add(pg_t *pg, cpu_t *cp)
410 {
411 	int	err;
412 
413 	ASSERT(MUTEX_HELD(&cpu_lock));
414 
415 	/* This adds the CPU to the PG's CPU group */
416 	err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
417 	ASSERT(err == 0);
418 
419 	/* This adds the PG to the CPUs PG group */
420 	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
421 	err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
422 	ASSERT(err == 0);
423 }
424 
425 /*
426  * Remove "cp" from "pg".
427  * This routine may block.
428  */
429 void
430 pg_cpu_delete(pg_t *pg, cpu_t *cp)
431 {
432 	int	err;
433 
434 	ASSERT(MUTEX_HELD(&cpu_lock));
435 
436 	/* Remove the CPU from the PG */
437 	err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
438 	ASSERT(err == 0);
439 
440 	/* Remove the PG from the CPU's PG group */
441 	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
442 	err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
443 	ASSERT(err == 0);
444 }
445 
446 /*
447  * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
448  */
449 static cpu_pg_t *
450 pg_cpu_data_alloc(void)
451 {
452 	cpu_pg_t	*pgd;
453 
454 	pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
455 	group_create(&pgd->pgs);
456 	group_create(&pgd->cmt_pgs);
457 
458 	return (pgd);
459 }
460 
461 /*
462  * Free the CPU's PG data.
463  */
464 static void
465 pg_cpu_data_free(cpu_pg_t *pgd)
466 {
467 	group_destroy(&pgd->pgs);
468 	group_destroy(&pgd->cmt_pgs);
469 	kmem_free(pgd, sizeof (cpu_pg_t));
470 }
471 
472 /*
473  * A new CPU is coming into the system, either via booting or DR.
474  * Allocate it's PG data, and notify all registered classes about
475  * the new CPU.
476  *
477  * This routine may block.
478  */
479 void
480 pg_cpu_init(cpu_t *cp)
481 {
482 	pg_cid_t	i;
483 
484 	ASSERT(MUTEX_HELD(&cpu_lock));
485 
486 	/*
487 	 * Allocate and size the per CPU pg data
488 	 */
489 	cp->cpu_pg = pg_cpu_data_alloc();
490 
491 	/*
492 	 * Notify all registered classes about the new CPU
493 	 */
494 	for (i = 0; i < pg_nclasses; i++)
495 		PG_CPU_INIT(i, cp);
496 }
497 
498 /*
499  * This CPU is being deleted from the system. Notify the classes
500  * and free up the CPU's PG data.
501  */
502 void
503 pg_cpu_fini(cpu_t *cp)
504 {
505 	pg_cid_t	i;
506 
507 	ASSERT(MUTEX_HELD(&cpu_lock));
508 
509 	/*
510 	 * This can happen if the CPU coming into the system
511 	 * failed to power on.
512 	 */
513 	if (cp->cpu_pg == NULL ||
514 	    cp->cpu_pg == &bootstrap_pg_data)
515 		return;
516 
517 	for (i = 0; i < pg_nclasses; i++)
518 		PG_CPU_FINI(i, cp);
519 
520 	pg_cpu_data_free(cp->cpu_pg);
521 	cp->cpu_pg = NULL;
522 }
523 
524 /*
525  * This CPU is becoming active (online)
526  * This routine may not block as it is called from paused CPUs
527  * context.
528  */
529 void
530 pg_cpu_active(cpu_t *cp)
531 {
532 	pg_cid_t	i;
533 
534 	ASSERT(MUTEX_HELD(&cpu_lock));
535 
536 	/*
537 	 * Notify all registered classes about the new CPU
538 	 */
539 	for (i = 0; i < pg_nclasses; i++)
540 		PG_CPU_ACTIVE(i, cp);
541 }
542 
543 /*
544  * This CPU is going inactive (offline)
545  * This routine may not block, as it is called from paused
546  * CPUs context.
547  */
548 void
549 pg_cpu_inactive(cpu_t *cp)
550 {
551 	pg_cid_t	i;
552 
553 	ASSERT(MUTEX_HELD(&cpu_lock));
554 
555 	/*
556 	 * Notify all registered classes about the new CPU
557 	 */
558 	for (i = 0; i < pg_nclasses; i++)
559 		PG_CPU_INACTIVE(i, cp);
560 }
561 
562 /*
563  * Invoked when the CPU is about to move into the partition
564  * This routine may block.
565  */
566 void
567 pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
568 {
569 	int	i;
570 
571 	ASSERT(MUTEX_HELD(&cpu_lock));
572 
573 	/*
574 	 * Notify all registered classes that the
575 	 * CPU is about to enter the CPU partition
576 	 */
577 	for (i = 0; i < pg_nclasses; i++)
578 		PG_CPUPART_IN(i, cp, pp);
579 }
580 
581 /*
582  * Invoked when the CPU is about to move out of the partition
583  * This routine may block.
584  */
585 /*ARGSUSED*/
586 void
587 pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
588 {
589 	int	i;
590 
591 	ASSERT(MUTEX_HELD(&cpu_lock));
592 
593 	/*
594 	 * Notify all registered classes that the
595 	 * CPU is about to leave the CPU partition
596 	 */
597 	for (i = 0; i < pg_nclasses; i++)
598 		PG_CPUPART_OUT(i, cp, pp);
599 }
600 
601 /*
602  * Invoked when the CPU is *moving* partitions.
603  *
604  * This routine may not block, as it is called from paused CPUs
605  * context.
606  */
607 void
608 pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
609 {
610 	int	i;
611 
612 	ASSERT(MUTEX_HELD(&cpu_lock));
613 
614 	/*
615 	 * Notify all registered classes that the
616 	 * CPU is about to leave the CPU partition
617 	 */
618 	for (i = 0; i < pg_nclasses; i++)
619 		PG_CPUPART_MOVE(i, cp, oldpp, newpp);
620 }
621 
622 /*
623  * Provide the specified CPU a bootstrap pg
624  * This is needed to allow sane behaviour if any PG consuming
625  * code needs to deal with a partially initialized CPU
626  */
627 void
628 pg_cpu_bootstrap(cpu_t *cp)
629 {
630 	cp->cpu_pg = &bootstrap_pg_data;
631 }
632 
633 /*ARGSUSED*/
634 static pg_t *
635 pg_alloc_default(pg_class_t class)
636 {
637 	return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
638 }
639 
640 /*ARGSUSED*/
641 static void
642 pg_free_default(struct pg *pg)
643 {
644 	kmem_free(pg, sizeof (pg_t));
645 }
646