1 /* $OpenBSD: subr_prof.c,v 1.41 2024/01/24 19:23:38 cheloha Exp $ */
2 /* $NetBSD: subr_prof.c,v 1.12 1996/04/22 01:38:50 christos Exp $ */
3
4 /*-
5 * Copyright (c) 1982, 1986, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/atomic.h>
38 #include <sys/clockintr.h>
39 #include <sys/pledge.h>
40 #include <sys/proc.h>
41 #include <sys/resourcevar.h>
42 #include <sys/mount.h>
43 #include <sys/sysctl.h>
44 #include <sys/syscallargs.h>
45 #include <sys/user.h>
46
47 uint64_t profclock_period;
48
49 #if defined(GPROF) || defined(DDBPROF)
50 #include <sys/malloc.h>
51 #include <sys/gmon.h>
52
53 #include <uvm/uvm_extern.h>
54
55 #include <machine/db_machdep.h>
56 #include <ddb/db_extern.h>
57
58 /*
59 * Flag to prevent CPUs from executing the mcount() monitor function
60 * until we're sure they are in a sane state.
61 */
62 int gmoninit = 0;
63 u_int gmon_cpu_count; /* [K] number of CPUs with profiling enabled */
64
65 extern char etext[];
66
67 void gmonclock(struct clockrequest *, void *, void *);
68
69 void
prof_init(void)70 prof_init(void)
71 {
72 CPU_INFO_ITERATOR cii;
73 struct cpu_info *ci;
74 struct gmonparam *p;
75 u_long lowpc, highpc, textsize;
76 u_long kcountsize, fromssize, tossize;
77 long tolimit;
78 char *cp;
79 int size;
80
81 /*
82 * Round lowpc and highpc to multiples of the density we're using
83 * so the rest of the scaling (here and in gprof) stays in ints.
84 */
85 lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
86 highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
87 textsize = highpc - lowpc;
88 #ifdef GPROF
89 printf("Profiling kernel, textsize=%ld [%lx..%lx]\n",
90 textsize, lowpc, highpc);
91 #endif
92 kcountsize = textsize / HISTFRACTION;
93 fromssize = textsize / HASHFRACTION;
94 tolimit = textsize * ARCDENSITY / 100;
95 if (tolimit < MINARCS)
96 tolimit = MINARCS;
97 else if (tolimit > MAXARCS)
98 tolimit = MAXARCS;
99 tossize = tolimit * sizeof(struct tostruct);
100 size = sizeof(*p) + kcountsize + fromssize + tossize;
101
102 /* Allocate and initialize one profiling buffer per CPU. */
103 CPU_INFO_FOREACH(cii, ci) {
104 cp = km_alloc(round_page(size), &kv_any, &kp_zero, &kd_nowait);
105 if (cp == NULL) {
106 printf("No memory for profiling.\n");
107 return;
108 }
109
110 clockintr_bind(&ci->ci_gmonclock, ci, gmonclock, NULL);
111 clockintr_stagger(&ci->ci_gmonclock, profclock_period,
112 CPU_INFO_UNIT(ci), MAXCPUS);
113
114 p = (struct gmonparam *)cp;
115 cp += sizeof(*p);
116 p->tos = (struct tostruct *)cp;
117 cp += tossize;
118 p->kcount = (u_short *)cp;
119 cp += kcountsize;
120 p->froms = (u_short *)cp;
121
122 p->state = GMON_PROF_OFF;
123 p->lowpc = lowpc;
124 p->highpc = highpc;
125 p->textsize = textsize;
126 p->hashfraction = HASHFRACTION;
127 p->kcountsize = kcountsize;
128 p->fromssize = fromssize;
129 p->tolimit = tolimit;
130 p->tossize = tossize;
131
132 ci->ci_gmon = p;
133 }
134 }
135
136 int
prof_state_toggle(struct cpu_info * ci,int oldstate)137 prof_state_toggle(struct cpu_info *ci, int oldstate)
138 {
139 struct gmonparam *gp = ci->ci_gmon;
140 int error = 0;
141
142 KERNEL_ASSERT_LOCKED();
143
144 if (gp->state == oldstate)
145 return (0);
146
147 switch (gp->state) {
148 case GMON_PROF_ON:
149 #if !defined(GPROF)
150 /*
151 * If this is not a profiling kernel, we need to patch
152 * all symbols that can be instrumented.
153 */
154 error = db_prof_enable();
155 #endif
156 if (error == 0) {
157 if (++gmon_cpu_count == 1)
158 startprofclock(&process0);
159 clockintr_advance(&ci->ci_gmonclock, profclock_period);
160 }
161 break;
162 default:
163 error = EINVAL;
164 gp->state = GMON_PROF_OFF;
165 /* FALLTHROUGH */
166 case GMON_PROF_OFF:
167 clockintr_cancel(&ci->ci_gmonclock);
168 if (--gmon_cpu_count == 0)
169 stopprofclock(&process0);
170 #if !defined(GPROF)
171 db_prof_disable();
172 #endif
173 break;
174 }
175
176 return (error);
177 }
178
179 /*
180 * Return kernel profiling information.
181 */
182 int
sysctl_doprof(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)183 sysctl_doprof(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
184 size_t newlen)
185 {
186 CPU_INFO_ITERATOR cii;
187 struct cpu_info *ci;
188 struct gmonparam *gp = NULL;
189 int error, cpuid, op, state;
190
191 /* all sysctl names at this level are name and field */
192 if (namelen != 2)
193 return (ENOTDIR); /* overloaded */
194
195 op = name[0];
196 cpuid = name[1];
197
198 CPU_INFO_FOREACH(cii, ci) {
199 if (cpuid == CPU_INFO_UNIT(ci)) {
200 gp = ci->ci_gmon;
201 break;
202 }
203 }
204
205 if (gp == NULL)
206 return (EOPNOTSUPP);
207
208 /* Assume that if we're here it is safe to execute profiling. */
209 gmoninit = 1;
210
211 switch (op) {
212 case GPROF_STATE:
213 state = gp->state;
214 error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
215 if (error)
216 return (error);
217 return prof_state_toggle(ci, state);
218 case GPROF_COUNT:
219 return (sysctl_struct(oldp, oldlenp, newp, newlen,
220 gp->kcount, gp->kcountsize));
221 case GPROF_FROMS:
222 return (sysctl_struct(oldp, oldlenp, newp, newlen,
223 gp->froms, gp->fromssize));
224 case GPROF_TOS:
225 return (sysctl_struct(oldp, oldlenp, newp, newlen,
226 gp->tos, gp->tossize));
227 case GPROF_GMONPARAM:
228 return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
229 default:
230 return (EOPNOTSUPP);
231 }
232 /* NOTREACHED */
233 }
234
235 void
gmonclock(struct clockrequest * cr,void * cf,void * arg)236 gmonclock(struct clockrequest *cr, void *cf, void *arg)
237 {
238 uint64_t count;
239 struct clockframe *frame = cf;
240 struct gmonparam *g = curcpu()->ci_gmon;
241 u_long i;
242
243 count = clockrequest_advance(cr, profclock_period);
244 if (count > ULONG_MAX)
245 count = ULONG_MAX;
246
247 /*
248 * Kernel statistics are just like addupc_intr(), only easier.
249 */
250 if (!CLKF_USERMODE(frame) && g != NULL && g->state == GMON_PROF_ON) {
251 i = CLKF_PC(frame) - g->lowpc;
252 if (i < g->textsize) {
253 i /= HISTFRACTION * sizeof(*g->kcount);
254 g->kcount[i] += (u_long)count;
255 }
256 }
257 }
258
259 #endif /* GPROF || DDBPROF */
260
261 /*
262 * Profiling system call.
263 *
264 * The scale factor is a fixed point number with 16 bits of fraction, so that
265 * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling.
266 */
267 int
sys_profil(struct proc * p,void * v,register_t * retval)268 sys_profil(struct proc *p, void *v, register_t *retval)
269 {
270 struct sys_profil_args /* {
271 syscallarg(caddr_t) samples;
272 syscallarg(size_t) size;
273 syscallarg(u_long) offset;
274 syscallarg(u_int) scale;
275 } */ *uap = v;
276 struct process *pr = p->p_p;
277 struct uprof *upp;
278 int error, s;
279
280 error = pledge_profil(p, SCARG(uap, scale));
281 if (error)
282 return error;
283
284 if (SCARG(uap, scale) > (1 << 16))
285 return (EINVAL);
286 if (SCARG(uap, scale) == 0) {
287 stopprofclock(pr);
288 need_resched(curcpu());
289 return (0);
290 }
291 upp = &pr->ps_prof;
292
293 /* Block profile interrupts while changing state. */
294 s = splstatclock();
295 upp->pr_off = SCARG(uap, offset);
296 upp->pr_scale = SCARG(uap, scale);
297 upp->pr_base = (caddr_t)SCARG(uap, samples);
298 upp->pr_size = SCARG(uap, size);
299 startprofclock(pr);
300 splx(s);
301 need_resched(curcpu());
302
303 return (0);
304 }
305
306 void
profclock(struct clockrequest * cr,void * cf,void * arg)307 profclock(struct clockrequest *cr, void *cf, void *arg)
308 {
309 uint64_t count;
310 struct clockframe *frame = cf;
311 struct proc *p = curproc;
312
313 count = clockrequest_advance(cr, profclock_period);
314 if (count > ULONG_MAX)
315 count = ULONG_MAX;
316
317 if (CLKF_USERMODE(frame)) {
318 if (ISSET(p->p_p->ps_flags, PS_PROFIL))
319 addupc_intr(p, CLKF_PC(frame), (u_long)count);
320 } else {
321 if (p != NULL && ISSET(p->p_p->ps_flags, PS_PROFIL))
322 addupc_intr(p, PROC_PC(p), (u_long)count);
323 }
324 }
325
326 /*
327 * Scale is a fixed-point number with the binary point 16 bits
328 * into the value, and is <= 1.0. pc is at most 32 bits, so the
329 * intermediate result is at most 48 bits.
330 */
331 #define PC_TO_INDEX(pc, prof) \
332 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
333 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
334
335 /*
336 * Collect user-level profiling statistics; called on a profiling tick,
337 * when a process is running in user-mode. This routine may be called
338 * from an interrupt context. Schedule an AST that will vector us to
339 * trap() with a context in which copyin and copyout will work.
340 * Trap will then call addupc_task().
341 */
342 void
addupc_intr(struct proc * p,u_long pc,u_long nticks)343 addupc_intr(struct proc *p, u_long pc, u_long nticks)
344 {
345 struct uprof *prof;
346
347 prof = &p->p_p->ps_prof;
348 if (pc < prof->pr_off || PC_TO_INDEX(pc, prof) >= prof->pr_size)
349 return; /* out of range; ignore */
350
351 p->p_prof_addr = pc;
352 p->p_prof_ticks += nticks;
353 atomic_setbits_int(&p->p_flag, P_OWEUPC);
354 need_proftick(p);
355 }
356
357
358 /*
359 * Much like before, but we can afford to take faults here. If the
360 * update fails, we simply turn off profiling.
361 */
362 void
addupc_task(struct proc * p,u_long pc,u_int nticks)363 addupc_task(struct proc *p, u_long pc, u_int nticks)
364 {
365 struct process *pr = p->p_p;
366 struct uprof *prof;
367 caddr_t addr;
368 u_int i;
369 u_short v;
370
371 /* Testing PS_PROFIL may be unnecessary, but is certainly safe. */
372 if ((pr->ps_flags & PS_PROFIL) == 0 || nticks == 0)
373 return;
374
375 prof = &pr->ps_prof;
376 if (pc < prof->pr_off ||
377 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
378 return;
379
380 addr = prof->pr_base + i;
381 if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
382 v += nticks;
383 if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
384 return;
385 }
386 stopprofclock(pr);
387 }
388