xref: /openbsd/sys/kern/subr_prof.c (revision 1d970828)
1 /*	$OpenBSD: subr_prof.c,v 1.41 2024/01/24 19:23:38 cheloha Exp $	*/
2 /*	$NetBSD: subr_prof.c,v 1.12 1996/04/22 01:38:50 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)subr_prof.c	8.3 (Berkeley) 9/23/93
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/atomic.h>
38 #include <sys/clockintr.h>
39 #include <sys/pledge.h>
40 #include <sys/proc.h>
41 #include <sys/resourcevar.h>
42 #include <sys/mount.h>
43 #include <sys/sysctl.h>
44 #include <sys/syscallargs.h>
45 #include <sys/user.h>
46 
47 uint64_t profclock_period;
48 
49 #if defined(GPROF) || defined(DDBPROF)
50 #include <sys/malloc.h>
51 #include <sys/gmon.h>
52 
53 #include <uvm/uvm_extern.h>
54 
55 #include <machine/db_machdep.h>
56 #include <ddb/db_extern.h>
57 
58 /*
59  * Flag to prevent CPUs from executing the mcount() monitor function
60  * until we're sure they are in a sane state.
61  */
62 int gmoninit = 0;
63 u_int gmon_cpu_count;		/* [K] number of CPUs with profiling enabled */
64 
65 extern char etext[];
66 
67 void gmonclock(struct clockrequest *, void *, void *);
68 
69 void
prof_init(void)70 prof_init(void)
71 {
72 	CPU_INFO_ITERATOR cii;
73 	struct cpu_info *ci;
74 	struct gmonparam *p;
75 	u_long lowpc, highpc, textsize;
76 	u_long kcountsize, fromssize, tossize;
77 	long tolimit;
78 	char *cp;
79 	int size;
80 
81 	/*
82 	 * Round lowpc and highpc to multiples of the density we're using
83 	 * so the rest of the scaling (here and in gprof) stays in ints.
84 	 */
85 	lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
86 	highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
87 	textsize = highpc - lowpc;
88 #ifdef GPROF
89 	printf("Profiling kernel, textsize=%ld [%lx..%lx]\n",
90 	    textsize, lowpc, highpc);
91 #endif
92 	kcountsize = textsize / HISTFRACTION;
93 	fromssize = textsize / HASHFRACTION;
94 	tolimit = textsize * ARCDENSITY / 100;
95 	if (tolimit < MINARCS)
96 		tolimit = MINARCS;
97 	else if (tolimit > MAXARCS)
98 		tolimit = MAXARCS;
99 	tossize = tolimit * sizeof(struct tostruct);
100 	size = sizeof(*p) + kcountsize + fromssize + tossize;
101 
102 	/* Allocate and initialize one profiling buffer per CPU. */
103 	CPU_INFO_FOREACH(cii, ci) {
104 		cp = km_alloc(round_page(size), &kv_any, &kp_zero, &kd_nowait);
105 		if (cp == NULL) {
106 			printf("No memory for profiling.\n");
107 			return;
108 		}
109 
110 		clockintr_bind(&ci->ci_gmonclock, ci, gmonclock, NULL);
111 		clockintr_stagger(&ci->ci_gmonclock, profclock_period,
112 		    CPU_INFO_UNIT(ci), MAXCPUS);
113 
114 		p = (struct gmonparam *)cp;
115 		cp += sizeof(*p);
116 		p->tos = (struct tostruct *)cp;
117 		cp += tossize;
118 		p->kcount = (u_short *)cp;
119 		cp += kcountsize;
120 		p->froms = (u_short *)cp;
121 
122 		p->state = GMON_PROF_OFF;
123 		p->lowpc = lowpc;
124 		p->highpc = highpc;
125 		p->textsize = textsize;
126 		p->hashfraction = HASHFRACTION;
127 		p->kcountsize = kcountsize;
128 		p->fromssize = fromssize;
129 		p->tolimit = tolimit;
130 		p->tossize = tossize;
131 
132 		ci->ci_gmon = p;
133 	}
134 }
135 
136 int
prof_state_toggle(struct cpu_info * ci,int oldstate)137 prof_state_toggle(struct cpu_info *ci, int oldstate)
138 {
139 	struct gmonparam *gp = ci->ci_gmon;
140 	int error = 0;
141 
142 	KERNEL_ASSERT_LOCKED();
143 
144 	if (gp->state == oldstate)
145 		return (0);
146 
147 	switch (gp->state) {
148 	case GMON_PROF_ON:
149 #if !defined(GPROF)
150 		/*
151 		 * If this is not a profiling kernel, we need to patch
152 		 * all symbols that can be instrumented.
153 		 */
154 		error = db_prof_enable();
155 #endif
156 		if (error == 0) {
157 			if (++gmon_cpu_count == 1)
158 				startprofclock(&process0);
159 			clockintr_advance(&ci->ci_gmonclock, profclock_period);
160 		}
161 		break;
162 	default:
163 		error = EINVAL;
164 		gp->state = GMON_PROF_OFF;
165 		/* FALLTHROUGH */
166 	case GMON_PROF_OFF:
167 		clockintr_cancel(&ci->ci_gmonclock);
168 		if (--gmon_cpu_count == 0)
169 			stopprofclock(&process0);
170 #if !defined(GPROF)
171 		db_prof_disable();
172 #endif
173 		break;
174 	}
175 
176 	return (error);
177 }
178 
179 /*
180  * Return kernel profiling information.
181  */
182 int
sysctl_doprof(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)183 sysctl_doprof(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
184     size_t newlen)
185 {
186 	CPU_INFO_ITERATOR cii;
187 	struct cpu_info *ci;
188 	struct gmonparam *gp = NULL;
189 	int error, cpuid, op, state;
190 
191 	/* all sysctl names at this level are name and field */
192 	if (namelen != 2)
193 		return (ENOTDIR);		/* overloaded */
194 
195 	op = name[0];
196 	cpuid = name[1];
197 
198 	CPU_INFO_FOREACH(cii, ci) {
199 		if (cpuid == CPU_INFO_UNIT(ci)) {
200 			gp = ci->ci_gmon;
201 			break;
202 		}
203 	}
204 
205 	if (gp == NULL)
206 		return (EOPNOTSUPP);
207 
208 	/* Assume that if we're here it is safe to execute profiling. */
209 	gmoninit = 1;
210 
211 	switch (op) {
212 	case GPROF_STATE:
213 		state = gp->state;
214 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
215 		if (error)
216 			return (error);
217 		return prof_state_toggle(ci, state);
218 	case GPROF_COUNT:
219 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
220 		    gp->kcount, gp->kcountsize));
221 	case GPROF_FROMS:
222 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
223 		    gp->froms, gp->fromssize));
224 	case GPROF_TOS:
225 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
226 		    gp->tos, gp->tossize));
227 	case GPROF_GMONPARAM:
228 		return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
229 	default:
230 		return (EOPNOTSUPP);
231 	}
232 	/* NOTREACHED */
233 }
234 
235 void
gmonclock(struct clockrequest * cr,void * cf,void * arg)236 gmonclock(struct clockrequest *cr, void *cf, void *arg)
237 {
238 	uint64_t count;
239 	struct clockframe *frame = cf;
240 	struct gmonparam *g = curcpu()->ci_gmon;
241 	u_long i;
242 
243 	count = clockrequest_advance(cr, profclock_period);
244 	if (count > ULONG_MAX)
245 		count = ULONG_MAX;
246 
247 	/*
248 	 * Kernel statistics are just like addupc_intr(), only easier.
249 	 */
250 	if (!CLKF_USERMODE(frame) && g != NULL && g->state == GMON_PROF_ON) {
251 		i = CLKF_PC(frame) - g->lowpc;
252 		if (i < g->textsize) {
253 			i /= HISTFRACTION * sizeof(*g->kcount);
254 			g->kcount[i] += (u_long)count;
255 		}
256 	}
257 }
258 
259 #endif /* GPROF || DDBPROF */
260 
261 /*
262  * Profiling system call.
263  *
264  * The scale factor is a fixed point number with 16 bits of fraction, so that
265  * 1.0 is represented as 0x10000.  A scale factor of 0 turns off profiling.
266  */
267 int
sys_profil(struct proc * p,void * v,register_t * retval)268 sys_profil(struct proc *p, void *v, register_t *retval)
269 {
270 	struct sys_profil_args /* {
271 		syscallarg(caddr_t) samples;
272 		syscallarg(size_t) size;
273 		syscallarg(u_long) offset;
274 		syscallarg(u_int) scale;
275 	} */ *uap = v;
276 	struct process *pr = p->p_p;
277 	struct uprof *upp;
278 	int error, s;
279 
280 	error = pledge_profil(p, SCARG(uap, scale));
281 	if (error)
282 		return error;
283 
284 	if (SCARG(uap, scale) > (1 << 16))
285 		return (EINVAL);
286 	if (SCARG(uap, scale) == 0) {
287 		stopprofclock(pr);
288 		need_resched(curcpu());
289 		return (0);
290 	}
291 	upp = &pr->ps_prof;
292 
293 	/* Block profile interrupts while changing state. */
294 	s = splstatclock();
295 	upp->pr_off = SCARG(uap, offset);
296 	upp->pr_scale = SCARG(uap, scale);
297 	upp->pr_base = (caddr_t)SCARG(uap, samples);
298 	upp->pr_size = SCARG(uap, size);
299 	startprofclock(pr);
300 	splx(s);
301 	need_resched(curcpu());
302 
303 	return (0);
304 }
305 
306 void
profclock(struct clockrequest * cr,void * cf,void * arg)307 profclock(struct clockrequest *cr, void *cf, void *arg)
308 {
309 	uint64_t count;
310 	struct clockframe *frame = cf;
311 	struct proc *p = curproc;
312 
313 	count = clockrequest_advance(cr, profclock_period);
314 	if (count > ULONG_MAX)
315 		count = ULONG_MAX;
316 
317 	if (CLKF_USERMODE(frame)) {
318 		if (ISSET(p->p_p->ps_flags, PS_PROFIL))
319 			addupc_intr(p, CLKF_PC(frame), (u_long)count);
320 	} else {
321 		if (p != NULL && ISSET(p->p_p->ps_flags, PS_PROFIL))
322 			addupc_intr(p, PROC_PC(p), (u_long)count);
323 	}
324 }
325 
326 /*
327  * Scale is a fixed-point number with the binary point 16 bits
328  * into the value, and is <= 1.0.  pc is at most 32 bits, so the
329  * intermediate result is at most 48 bits.
330  */
331 #define	PC_TO_INDEX(pc, prof) \
332 	((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
333 	    (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
334 
335 /*
336  * Collect user-level profiling statistics; called on a profiling tick,
337  * when a process is running in user-mode.  This routine may be called
338  * from an interrupt context. Schedule an AST that will vector us to
339  * trap() with a context in which copyin and copyout will work.
340  * Trap will then call addupc_task().
341  */
342 void
addupc_intr(struct proc * p,u_long pc,u_long nticks)343 addupc_intr(struct proc *p, u_long pc, u_long nticks)
344 {
345 	struct uprof *prof;
346 
347 	prof = &p->p_p->ps_prof;
348 	if (pc < prof->pr_off || PC_TO_INDEX(pc, prof) >= prof->pr_size)
349 		return;			/* out of range; ignore */
350 
351 	p->p_prof_addr = pc;
352 	p->p_prof_ticks += nticks;
353 	atomic_setbits_int(&p->p_flag, P_OWEUPC);
354 	need_proftick(p);
355 }
356 
357 
358 /*
359  * Much like before, but we can afford to take faults here.  If the
360  * update fails, we simply turn off profiling.
361  */
362 void
addupc_task(struct proc * p,u_long pc,u_int nticks)363 addupc_task(struct proc *p, u_long pc, u_int nticks)
364 {
365 	struct process *pr = p->p_p;
366 	struct uprof *prof;
367 	caddr_t addr;
368 	u_int i;
369 	u_short v;
370 
371 	/* Testing PS_PROFIL may be unnecessary, but is certainly safe. */
372 	if ((pr->ps_flags & PS_PROFIL) == 0 || nticks == 0)
373 		return;
374 
375 	prof = &pr->ps_prof;
376 	if (pc < prof->pr_off ||
377 	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
378 		return;
379 
380 	addr = prof->pr_base + i;
381 	if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
382 		v += nticks;
383 		if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
384 			return;
385 	}
386 	stopprofclock(pr);
387 }
388