1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * $FreeBSD$ 12 */ 13 14 #ifndef _SYS_SMP_H_ 15 #define _SYS_SMP_H_ 16 17 #ifdef _KERNEL 18 19 #ifndef LOCORE 20 21 #include <sys/cpuset.h> 22 #include <sys/queue.h> 23 24 /* 25 * Types of nodes in the topological tree. 26 */ 27 typedef enum { 28 /* No node has this type; can be used in topo API calls. */ 29 TOPO_TYPE_DUMMY, 30 /* Processing unit aka computing unit aka logical CPU. */ 31 TOPO_TYPE_PU, 32 /* Physical subdivision of a package. */ 33 TOPO_TYPE_CORE, 34 /* CPU L1/L2/L3 cache. */ 35 TOPO_TYPE_CACHE, 36 /* Package aka chip, equivalent to socket. */ 37 TOPO_TYPE_PKG, 38 /* NUMA node. */ 39 TOPO_TYPE_NODE, 40 /* Other logical or physical grouping of PUs. */ 41 /* E.g. PUs on the same dye, or PUs sharing an FPU. */ 42 TOPO_TYPE_GROUP, 43 /* The whole system. */ 44 TOPO_TYPE_SYSTEM 45 } topo_node_type; 46 47 /* Hardware indenitifier of a topology component. */ 48 typedef unsigned int hwid_t; 49 /* Logical CPU idenitifier. */ 50 typedef int cpuid_t; 51 52 /* A node in the topology. */ 53 struct topo_node { 54 struct topo_node *parent; 55 TAILQ_HEAD(topo_children, topo_node) children; 56 TAILQ_ENTRY(topo_node) siblings; 57 cpuset_t cpuset; 58 topo_node_type type; 59 uintptr_t subtype; 60 hwid_t hwid; 61 cpuid_t id; 62 int nchildren; 63 int cpu_count; 64 }; 65 66 /* 67 * Scheduling topology of a NUMA or SMP system. 68 * 69 * The top level topology is an array of pointers to groups. Each group 70 * contains a bitmask of cpus in its group or subgroups. It may also 71 * contain a pointer to an array of child groups. 72 * 73 * The bitmasks at non leaf groups may be used by consumers who support 74 * a smaller depth than the hardware provides. 75 * 76 * The topology may be omitted by systems where all CPUs are equal. 77 */ 78 79 struct cpu_group { 80 struct cpu_group *cg_parent; /* Our parent group. */ 81 struct cpu_group *cg_child; /* Optional children groups. */ 82 cpuset_t cg_mask; /* Mask of cpus in this group. */ 83 int32_t cg_count; /* Count of cpus in this group. */ 84 int32_t cg_first; /* First cpu in this group. */ 85 int32_t cg_last; /* Last cpu in this group. */ 86 int16_t cg_children; /* Number of children groups. */ 87 int8_t cg_level; /* Shared cache level. */ 88 int8_t cg_flags; /* Traversal modifiers. */ 89 }; 90 91 typedef struct cpu_group *cpu_group_t; 92 93 /* 94 * Defines common resources for CPUs in the group. The highest level 95 * resource should be used when multiple are shared. 96 */ 97 #define CG_SHARE_NONE 0 98 #define CG_SHARE_L1 1 99 #define CG_SHARE_L2 2 100 #define CG_SHARE_L3 3 101 102 #define MAX_CACHE_LEVELS CG_SHARE_L3 103 104 /* 105 * Behavior modifiers for load balancing and affinity. 106 */ 107 #define CG_FLAG_HTT 0x01 /* Schedule the alternate core last. */ 108 #define CG_FLAG_SMT 0x02 /* New age htt, less crippled. */ 109 #define CG_FLAG_THREAD (CG_FLAG_HTT | CG_FLAG_SMT) /* Any threading. */ 110 111 /* 112 * Convenience routines for building and traversing topologies. 113 */ 114 #ifdef SMP 115 void topo_init_node(struct topo_node *node); 116 void topo_init_root(struct topo_node *root); 117 struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid, 118 topo_node_type type, uintptr_t subtype); 119 struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid, 120 topo_node_type type, uintptr_t subtype); 121 void topo_promote_child(struct topo_node *child); 122 struct topo_node * topo_next_node(struct topo_node *top, 123 struct topo_node *node); 124 struct topo_node * topo_next_nonchild_node(struct topo_node *top, 125 struct topo_node *node); 126 void topo_set_pu_id(struct topo_node *node, cpuid_t id); 127 128 enum topo_level { 129 TOPO_LEVEL_PKG = 0, 130 /* 131 * Some systems have useful sub-package core organizations. On these, 132 * a package has one or more subgroups. Each subgroup contains one or 133 * more cache groups (cores that share a last level cache). 134 */ 135 TOPO_LEVEL_GROUP, 136 TOPO_LEVEL_CACHEGROUP, 137 TOPO_LEVEL_CORE, 138 TOPO_LEVEL_THREAD, 139 TOPO_LEVEL_COUNT /* Must be last */ 140 }; 141 struct topo_analysis { 142 int entities[TOPO_LEVEL_COUNT]; 143 }; 144 int topo_analyze(struct topo_node *topo_root, int all, 145 struct topo_analysis *results); 146 147 #define TOPO_FOREACH(i, root) \ 148 for (i = root; i != NULL; i = topo_next_node(root, i)) 149 150 struct cpu_group *smp_topo(void); 151 struct cpu_group *smp_topo_alloc(u_int count); 152 struct cpu_group *smp_topo_none(void); 153 struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags); 154 struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share, 155 int l1count, int l1flags); 156 struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu); 157 158 extern void (*cpustop_restartfunc)(void); 159 /* The suspend/resume cpusets are x86 only, but minimize ifdefs. */ 160 extern volatile cpuset_t resuming_cpus; /* woken up cpus in suspend pen */ 161 extern volatile cpuset_t started_cpus; /* cpus to let out of stop pen */ 162 extern volatile cpuset_t stopped_cpus; /* cpus in stop pen */ 163 extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */ 164 extern volatile cpuset_t toresume_cpus; /* cpus to let out of suspend pen */ 165 extern cpuset_t hlt_cpus_mask; /* XXX 'mask' is detail in old impl */ 166 extern cpuset_t logical_cpus_mask; 167 #endif /* SMP */ 168 169 extern u_int mp_maxid; 170 extern int mp_maxcpus; 171 extern int mp_ncores; 172 extern int mp_ncpus; 173 extern int smp_cpus; 174 extern volatile int smp_started; 175 extern int smp_threads_per_core; 176 177 extern cpuset_t all_cpus; 178 extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */ 179 180 /* 181 * Macro allowing us to determine whether a CPU is absent at any given 182 * time, thus permitting us to configure sparse maps of cpuid-dependent 183 * (per-CPU) structures. 184 */ 185 #define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus)) 186 187 /* 188 * Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an 189 * integer iterator and iterates over the available set of CPUs. 190 * CPU_FIRST() returns the id of the first non-absent CPU. CPU_NEXT() 191 * returns the id of the next non-absent CPU. It will wrap back to 192 * CPU_FIRST() once the end of the list is reached. The iterators are 193 * currently implemented via inline functions. 194 */ 195 #define CPU_FOREACH(i) \ 196 for ((i) = 0; (i) <= mp_maxid; (i)++) \ 197 if (!CPU_ABSENT((i))) 198 199 static __inline int 200 cpu_first(void) 201 { 202 int i; 203 204 for (i = 0;; i++) 205 if (!CPU_ABSENT(i)) 206 return (i); 207 } 208 209 static __inline int 210 cpu_next(int i) 211 { 212 213 for (;;) { 214 i++; 215 if (i > mp_maxid) 216 i = 0; 217 if (!CPU_ABSENT(i)) 218 return (i); 219 } 220 } 221 222 #define CPU_FIRST() cpu_first() 223 #define CPU_NEXT(i) cpu_next((i)) 224 225 #ifdef SMP 226 /* 227 * Machine dependent functions used to initialize MP support. 228 * 229 * The cpu_mp_probe() should check to see if MP support is present and return 230 * zero if it is not or non-zero if it is. If MP support is present, then 231 * cpu_mp_start() will be called so that MP can be enabled. This function 232 * should do things such as startup secondary processors. It should also 233 * setup mp_ncpus, all_cpus, and smp_cpus. It should also ensure that 234 * smp_started is initialized at the appropriate time. 235 * Once cpu_mp_start() returns, machine independent MP startup code will be 236 * executed and a simple message will be output to the console. Finally, 237 * cpu_mp_announce() will be called so that machine dependent messages about 238 * the MP support may be output to the console if desired. 239 * 240 * The cpu_setmaxid() function is called very early during the boot process 241 * so that the MD code may set mp_maxid to provide an upper bound on CPU IDs 242 * that other subsystems may use. If a platform is not able to determine 243 * the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1. 244 */ 245 struct thread; 246 247 struct cpu_group *cpu_topo(void); 248 void cpu_mp_announce(void); 249 int cpu_mp_probe(void); 250 void cpu_mp_setmaxid(void); 251 void cpu_mp_start(void); 252 253 void forward_signal(struct thread *); 254 int restart_cpus(cpuset_t); 255 int stop_cpus(cpuset_t); 256 int stop_cpus_hard(cpuset_t); 257 #if defined(__amd64__) || defined(__i386__) 258 int suspend_cpus(cpuset_t); 259 int resume_cpus(cpuset_t); 260 #endif 261 262 void smp_rendezvous_action(void); 263 extern struct mtx smp_ipi_mtx; 264 265 #endif /* SMP */ 266 267 int quiesce_all_cpus(const char *, int); 268 int quiesce_cpus(cpuset_t, const char *, int); 269 void quiesce_all_critical(void); 270 void cpus_fence_seq_cst(void); 271 void smp_no_rendezvous_barrier(void *); 272 void smp_rendezvous(void (*)(void *), 273 void (*)(void *), 274 void (*)(void *), 275 void *arg); 276 void smp_rendezvous_cpus(cpuset_t, 277 void (*)(void *), 278 void (*)(void *), 279 void (*)(void *), 280 void *arg); 281 282 struct smp_rendezvous_cpus_retry_arg { 283 cpuset_t cpus; 284 }; 285 void smp_rendezvous_cpus_retry(cpuset_t, 286 void (*)(void *), 287 void (*)(void *), 288 void (*)(void *), 289 void (*)(void *, int), 290 struct smp_rendezvous_cpus_retry_arg *); 291 292 void smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *); 293 294 #endif /* !LOCORE */ 295 #endif /* _KERNEL */ 296 #endif /* _SYS_SMP_H_ */ 297