xref: /freebsd/sys/kern/subr_pcpu.c (revision 0957b409)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2001 Wind River Systems, Inc.
5  * All rights reserved.
6  * Written by: John Baldwin <jhb@FreeBSD.org>
7  *
8  * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the author nor the names of any co-contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * This module provides MI support for per-cpu data.
38  *
39  * Each architecture determines the mapping of logical CPU IDs to physical
40  * CPUs.  The requirements of this mapping are as follows:
41  *  - Logical CPU IDs must reside in the range 0 ... MAXCPU - 1.
42  *  - The mapping is not required to be dense.  That is, there may be
43  *    gaps in the mappings.
44  *  - The platform sets the value of MAXCPU in <machine/param.h>.
45  *  - It is suggested, but not required, that in the non-SMP case, the
46  *    platform define MAXCPU to be 1 and define the logical ID of the
47  *    sole CPU as 0.
48  */
49 
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD$");
52 
53 #include "opt_ddb.h"
54 
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/sysctl.h>
58 #include <sys/lock.h>
59 #include <sys/malloc.h>
60 #include <sys/pcpu.h>
61 #include <sys/proc.h>
62 #include <sys/smp.h>
63 #include <sys/sx.h>
64 #include <vm/uma.h>
65 #include <ddb/ddb.h>
66 
67 static MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting.");
68 
69 struct dpcpu_free {
70 	uintptr_t	df_start;
71 	int		df_len;
72 	TAILQ_ENTRY(dpcpu_free) df_link;
73 };
74 
75 DPCPU_DEFINE_STATIC(char, modspace[DPCPU_MODMIN] __aligned(__alignof(void *)));
76 static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
77 static struct sx dpcpu_lock;
78 uintptr_t dpcpu_off[MAXCPU];
79 struct pcpu *cpuid_to_pcpu[MAXCPU];
80 struct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead);
81 
82 /*
83  * Initialize the MI portions of a struct pcpu.
84  */
85 void
86 pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
87 {
88 
89 	bzero(pcpu, size);
90 	KASSERT(cpuid >= 0 && cpuid < MAXCPU,
91 	    ("pcpu_init: invalid cpuid %d", cpuid));
92 	pcpu->pc_cpuid = cpuid;
93 	cpuid_to_pcpu[cpuid] = pcpu;
94 	STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu);
95 	cpu_pcpu_init(pcpu, cpuid, size);
96 	pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue;
97 	pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue;
98 }
99 
100 void
101 dpcpu_init(void *dpcpu, int cpuid)
102 {
103 	struct pcpu *pcpu;
104 
105 	pcpu = pcpu_find(cpuid);
106 	pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START;
107 
108 	/*
109 	 * Initialize defaults from our linker section.
110 	 */
111 	memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES);
112 
113 	/*
114 	 * Place it in the global pcpu offset array.
115 	 */
116 	dpcpu_off[cpuid] = pcpu->pc_dynamic;
117 }
118 
119 static void
120 dpcpu_startup(void *dummy __unused)
121 {
122 	struct dpcpu_free *df;
123 
124 	df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
125 	df->df_start = (uintptr_t)&DPCPU_NAME(modspace);
126 	df->df_len = DPCPU_MODMIN;
127 	TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link);
128 	sx_init(&dpcpu_lock, "dpcpu alloc lock");
129 }
130 SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, NULL);
131 
132 /*
133  * UMA_PCPU_ZONE zones, that are available for all kernel
134  * consumers. Right now 64 bit zone is used for counter(9)
135  * and pointer zone is used by flowtable.
136  */
137 
138 uma_zone_t pcpu_zone_64;
139 uma_zone_t pcpu_zone_ptr;
140 
141 static void
142 pcpu_zones_startup(void)
143 {
144 
145 	pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t),
146 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
147 
148 	if (sizeof(uint64_t) == sizeof(void *))
149 		pcpu_zone_ptr = pcpu_zone_64;
150 	else
151 		pcpu_zone_ptr = uma_zcreate("ptr pcpu", sizeof(void *),
152 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
153 }
154 SYSINIT(pcpu_zones, SI_SUB_VM, SI_ORDER_ANY, pcpu_zones_startup, NULL);
155 
156 /*
157  * First-fit extent based allocator for allocating space in the per-cpu
158  * region reserved for modules.  This is only intended for use by the
159  * kernel linkers to place module linker sets.
160  */
161 void *
162 dpcpu_alloc(int size)
163 {
164 	struct dpcpu_free *df;
165 	void *s;
166 
167 	s = NULL;
168 	size = roundup2(size, sizeof(void *));
169 	sx_xlock(&dpcpu_lock);
170 	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
171 		if (df->df_len < size)
172 			continue;
173 		if (df->df_len == size) {
174 			s = (void *)df->df_start;
175 			TAILQ_REMOVE(&dpcpu_head, df, df_link);
176 			free(df, M_PCPU);
177 			break;
178 		}
179 		s = (void *)df->df_start;
180 		df->df_len -= size;
181 		df->df_start = df->df_start + size;
182 		break;
183 	}
184 	sx_xunlock(&dpcpu_lock);
185 
186 	return (s);
187 }
188 
189 /*
190  * Free dynamic per-cpu space at module unload time.
191  */
192 void
193 dpcpu_free(void *s, int size)
194 {
195 	struct dpcpu_free *df;
196 	struct dpcpu_free *dn;
197 	uintptr_t start;
198 	uintptr_t end;
199 
200 	size = roundup2(size, sizeof(void *));
201 	start = (uintptr_t)s;
202 	end = start + size;
203 	/*
204 	 * Free a region of space and merge it with as many neighbors as
205 	 * possible.  Keeping the list sorted simplifies this operation.
206 	 */
207 	sx_xlock(&dpcpu_lock);
208 	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
209 		if (df->df_start > end)
210 			break;
211 		/*
212 		 * If we expand at the end of an entry we may have to
213 		 * merge it with the one following it as well.
214 		 */
215 		if (df->df_start + df->df_len == start) {
216 			df->df_len += size;
217 			dn = TAILQ_NEXT(df, df_link);
218 			if (df->df_start + df->df_len == dn->df_start) {
219 				df->df_len += dn->df_len;
220 				TAILQ_REMOVE(&dpcpu_head, dn, df_link);
221 				free(dn, M_PCPU);
222 			}
223 			sx_xunlock(&dpcpu_lock);
224 			return;
225 		}
226 		if (df->df_start == end) {
227 			df->df_start = start;
228 			df->df_len += size;
229 			sx_xunlock(&dpcpu_lock);
230 			return;
231 		}
232 	}
233 	dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
234 	dn->df_start = start;
235 	dn->df_len = size;
236 	if (df)
237 		TAILQ_INSERT_BEFORE(df, dn, df_link);
238 	else
239 		TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link);
240 	sx_xunlock(&dpcpu_lock);
241 }
242 
243 /*
244  * Initialize the per-cpu storage from an updated linker-set region.
245  */
246 void
247 dpcpu_copy(void *s, int size)
248 {
249 #ifdef SMP
250 	uintptr_t dpcpu;
251 	int i;
252 
253 	CPU_FOREACH(i) {
254 		dpcpu = dpcpu_off[i];
255 		if (dpcpu == 0)
256 			continue;
257 		memcpy((void *)(dpcpu + (uintptr_t)s), s, size);
258 	}
259 #else
260 	memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size);
261 #endif
262 }
263 
264 /*
265  * Destroy a struct pcpu.
266  */
267 void
268 pcpu_destroy(struct pcpu *pcpu)
269 {
270 
271 	STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu);
272 	cpuid_to_pcpu[pcpu->pc_cpuid] = NULL;
273 	dpcpu_off[pcpu->pc_cpuid] = 0;
274 }
275 
276 /*
277  * Locate a struct pcpu by cpu id.
278  */
279 struct pcpu *
280 pcpu_find(u_int cpuid)
281 {
282 
283 	return (cpuid_to_pcpu[cpuid]);
284 }
285 
286 int
287 sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
288 {
289 	uintptr_t dpcpu;
290 	int64_t count;
291 	int i;
292 
293 	count = 0;
294 	CPU_FOREACH(i) {
295 		dpcpu = dpcpu_off[i];
296 		if (dpcpu == 0)
297 			continue;
298 		count += *(int64_t *)(dpcpu + (uintptr_t)arg1);
299 	}
300 	return (SYSCTL_OUT(req, &count, sizeof(count)));
301 }
302 
303 int
304 sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS)
305 {
306 	uintptr_t dpcpu;
307 	long count;
308 	int i;
309 
310 	count = 0;
311 	CPU_FOREACH(i) {
312 		dpcpu = dpcpu_off[i];
313 		if (dpcpu == 0)
314 			continue;
315 		count += *(long *)(dpcpu + (uintptr_t)arg1);
316 	}
317 	return (SYSCTL_OUT(req, &count, sizeof(count)));
318 }
319 
320 int
321 sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
322 {
323 	uintptr_t dpcpu;
324 	int count;
325 	int i;
326 
327 	count = 0;
328 	CPU_FOREACH(i) {
329 		dpcpu = dpcpu_off[i];
330 		if (dpcpu == 0)
331 			continue;
332 		count += *(int *)(dpcpu + (uintptr_t)arg1);
333 	}
334 	return (SYSCTL_OUT(req, &count, sizeof(count)));
335 }
336 
337 #ifdef DDB
338 DB_SHOW_COMMAND(dpcpu_off, db_show_dpcpu_off)
339 {
340 	int id;
341 
342 	CPU_FOREACH(id) {
343 		db_printf("dpcpu_off[%2d] = 0x%jx (+ DPCPU_START = %p)\n",
344 		    id, (uintmax_t)dpcpu_off[id],
345 		    (void *)(uintptr_t)(dpcpu_off[id] + DPCPU_START));
346 	}
347 }
348 
349 static void
350 show_pcpu(struct pcpu *pc)
351 {
352 	struct thread *td;
353 
354 	db_printf("cpuid        = %d\n", pc->pc_cpuid);
355 	db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic);
356 	db_printf("curthread    = ");
357 	td = pc->pc_curthread;
358 	if (td != NULL)
359 		db_printf("%p: pid %d tid %d \"%s\"\n", td, td->td_proc->p_pid,
360 		    td->td_tid, td->td_name);
361 	else
362 		db_printf("none\n");
363 	db_printf("curpcb       = %p\n", pc->pc_curpcb);
364 	db_printf("fpcurthread  = ");
365 	td = pc->pc_fpcurthread;
366 	if (td != NULL)
367 		db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid,
368 		    td->td_name);
369 	else
370 		db_printf("none\n");
371 	db_printf("idlethread   = ");
372 	td = pc->pc_idlethread;
373 	if (td != NULL)
374 		db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name);
375 	else
376 		db_printf("none\n");
377 	db_show_mdpcpu(pc);
378 
379 #ifdef VIMAGE
380 	db_printf("curvnet      = %p\n", pc->pc_curthread->td_vnet);
381 #endif
382 
383 #ifdef WITNESS
384 	db_printf("spin locks held:\n");
385 	witness_list_locks(&pc->pc_spinlocks, db_printf);
386 #endif
387 }
388 
389 DB_SHOW_COMMAND(pcpu, db_show_pcpu)
390 {
391 	struct pcpu *pc;
392 	int id;
393 
394 	if (have_addr)
395 		id = ((addr >> 4) % 16) * 10 + (addr % 16);
396 	else
397 		id = PCPU_GET(cpuid);
398 	pc = pcpu_find(id);
399 	if (pc == NULL) {
400 		db_printf("CPU %d not found\n", id);
401 		return;
402 	}
403 	show_pcpu(pc);
404 }
405 
406 DB_SHOW_ALL_COMMAND(pcpu, db_show_cpu_all)
407 {
408 	struct pcpu *pc;
409 	int id;
410 
411 	db_printf("Current CPU: %d\n\n", PCPU_GET(cpuid));
412 	CPU_FOREACH(id) {
413 		pc = pcpu_find(id);
414 		if (pc != NULL) {
415 			show_pcpu(pc);
416 			db_printf("\n");
417 		}
418 	}
419 }
420 DB_SHOW_ALIAS(allpcpu, db_show_cpu_all);
421 #endif
422