xref: /illumos-gate/usr/src/uts/common/dtrace/profile.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/errno.h>
30 #include <sys/stat.h>
31 #include <sys/modctl.h>
32 #include <sys/conf.h>
33 #include <sys/systm.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/cpuvar.h>
37 #include <sys/kmem.h>
38 #include <sys/strsubr.h>
39 #include <sys/dtrace.h>
40 #include <sys/cyclic.h>
41 #include <sys/atomic.h>
42 
43 static dev_info_t *profile_devi;
44 static dtrace_provider_id_t profile_id;
45 
46 /*
47  * Regardless of platform, there are five artificial frames in the case of the
48  * profile provider:
49  *
50  *	profile_fire
51  *	cyclic_expire
52  *	cyclic_fire
53  *	[ cbe ]
54  *	[ locore ]
55  *
56  * On amd64, there are two frames associated with locore:  one in locore, and
57  * another in common interrupt dispatch code.  (i386 has not been modified to
58  * use this common layer.)  Further, on i386, the interrupted instruction
59  * appears as its own stack frame.  All of this means that we need to add one
60  * frame for amd64, and then take one away for both amd64 and i386.
61  *
62  * On SPARC, the picture is further complicated because the compiler
63  * optimizes away tail-calls -- so the following frames are optimized away:
64  *
65  * 	profile_fire
66  *	cyclic_expire
67  *
68  * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
69  * frame cannot be tail-call eliminated, yielding four frames in this case.
70  *
71  * All of the above constraints lead to the mess below.  Yes, the profile
72  * provider should ideally figure this out on-the-fly by hiting one of its own
73  * probes and then walking its own stack trace.  This is complicated, however,
74  * and the static definition doesn't seem to be overly brittle.  Still, we
75  * allow for a manual override in case we get it completely wrong.
76  */
77 #ifdef __amd64
78 #define	PROF_ARTIFICIAL_FRAMES	7
79 #else
80 #ifdef __i386
81 #define	PROF_ARTIFICIAL_FRAMES	6
82 #else
83 #ifdef __sparc
84 #ifdef DEBUG
85 #define	PROF_ARTIFICIAL_FRAMES	4
86 #else
87 #define	PROF_ARTIFICIAL_FRAMES	3
88 #endif
89 #endif
90 #endif
91 #endif
92 
93 #define	PROF_NAMELEN		15
94 
95 #define	PROF_PROFILE		0
96 #define	PROF_TICK		1
97 #define	PROF_PREFIX_PROFILE	"profile-"
98 #define	PROF_PREFIX_TICK	"tick-"
99 
100 typedef struct profile_probe {
101 	char		prof_name[PROF_NAMELEN];
102 	dtrace_id_t	prof_id;
103 	int		prof_kind;
104 	hrtime_t	prof_interval;
105 	cyclic_id_t	prof_cyclic;
106 } profile_probe_t;
107 
108 typedef struct profile_probe_percpu {
109 	hrtime_t	profc_expected;
110 	hrtime_t	profc_interval;
111 	profile_probe_t	*profc_probe;
112 } profile_probe_percpu_t;
113 
114 hrtime_t	profile_interval_min = NANOSEC / 5000;		/* 5000 hz */
115 int		profile_aframes = 0;				/* override */
116 
117 static int profile_rates[] = {
118     97, 199, 499, 997, 1999,
119     4001, 4999, 0, 0, 0,
120     0, 0, 0, 0, 0,
121     0, 0, 0, 0, 0
122 };
123 
124 static int profile_ticks[] = {
125     1, 10, 100, 500, 1000,
126     5000, 0, 0, 0, 0,
127     0, 0, 0, 0, 0
128 };
129 
130 /*
131  * profile_max defines the upper bound on the number of profile probes that
132  * can exist (this is to prevent malicious or clumsy users from exhausing
133  * system resources by creating a slew of profile probes). At mod load time,
134  * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
135  * present in the profile.conf file.
136  */
137 #define	PROFILE_MAX_DEFAULT	1000	/* default max. number of probes */
138 static uint32_t profile_max;		/* maximum number of profile probes */
139 static uint32_t profile_total;	/* current number of profile probes */
140 
141 static void
142 profile_fire(void *arg)
143 {
144 	profile_probe_percpu_t *pcpu = arg;
145 	profile_probe_t *prof = pcpu->profc_probe;
146 	hrtime_t late;
147 
148 	late = dtrace_gethrtime() - pcpu->profc_expected;
149 	pcpu->profc_expected += pcpu->profc_interval;
150 
151 	dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
152 	    CPU->cpu_profile_upc, late, 0, 0);
153 }
154 
155 static void
156 profile_tick(void *arg)
157 {
158 	profile_probe_t *prof = arg;
159 
160 	dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
161 	    CPU->cpu_profile_upc, 0, 0, 0);
162 }
163 
164 static void
165 profile_create(hrtime_t interval, const char *name, int kind)
166 {
167 	profile_probe_t *prof;
168 
169 	if (interval < profile_interval_min)
170 		return;
171 
172 	if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
173 		return;
174 
175 	atomic_add_32(&profile_total, 1);
176 	if (profile_total > profile_max) {
177 		atomic_add_32(&profile_total, -1);
178 		return;
179 	}
180 
181 	prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
182 	(void) strcpy(prof->prof_name, name);
183 	prof->prof_interval = interval;
184 	prof->prof_cyclic = CYCLIC_NONE;
185 	prof->prof_kind = kind;
186 	prof->prof_id = dtrace_probe_create(profile_id,
187 	    NULL, NULL, name,
188 	    profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof);
189 }
190 
191 /*ARGSUSED*/
192 static void
193 profile_provide(void *arg, const dtrace_probedesc_t *desc)
194 {
195 	int i, j, rate, kind;
196 	hrtime_t val = 0, mult = 1, len;
197 	const char *name, *suffix = NULL;
198 
199 	const struct {
200 		char *prefix;
201 		int kind;
202 	} types[] = {
203 		{ PROF_PREFIX_PROFILE, PROF_PROFILE },
204 		{ PROF_PREFIX_TICK, PROF_TICK },
205 		{ NULL, NULL }
206 	};
207 
208 	const struct {
209 		char *name;
210 		hrtime_t mult;
211 	} suffixes[] = {
212 		{ "ns", 	NANOSEC / NANOSEC },
213 		{ "nsec",	NANOSEC / NANOSEC },
214 		{ "us",		NANOSEC / MICROSEC },
215 		{ "usec",	NANOSEC / MICROSEC },
216 		{ "ms",		NANOSEC / MILLISEC },
217 		{ "msec",	NANOSEC / MILLISEC },
218 		{ "s",		NANOSEC / SEC },
219 		{ "sec",	NANOSEC / SEC },
220 		{ "m",		NANOSEC * (hrtime_t)60 },
221 		{ "min",	NANOSEC * (hrtime_t)60 },
222 		{ "h",		NANOSEC * (hrtime_t)(60 * 60) },
223 		{ "hour",	NANOSEC * (hrtime_t)(60 * 60) },
224 		{ "d",		NANOSEC * (hrtime_t)(24 * 60 * 60) },
225 		{ "day",	NANOSEC * (hrtime_t)(24 * 60 * 60) },
226 		{ "hz",		0 },
227 		{ NULL }
228 	};
229 
230 	if (desc == NULL) {
231 		char n[PROF_NAMELEN];
232 
233 		/*
234 		 * If no description was provided, provide all of our probes.
235 		 */
236 		for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
237 			if ((rate = profile_rates[i]) == 0)
238 				continue;
239 
240 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
241 			    PROF_PREFIX_PROFILE, rate);
242 			profile_create(NANOSEC / rate, n, PROF_PROFILE);
243 		}
244 
245 		for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
246 			if ((rate = profile_ticks[i]) == 0)
247 				continue;
248 
249 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
250 			    PROF_PREFIX_TICK, rate);
251 			profile_create(NANOSEC / rate, n, PROF_TICK);
252 		}
253 
254 		return;
255 	}
256 
257 	name = desc->dtpd_name;
258 
259 	for (i = 0; types[i].prefix != NULL; i++) {
260 		len = strlen(types[i].prefix);
261 
262 		if (strncmp(name, types[i].prefix, len) != 0)
263 			continue;
264 		break;
265 	}
266 
267 	if (types[i].prefix == NULL)
268 		return;
269 
270 	kind = types[i].kind;
271 	j = strlen(name) - len;
272 
273 	/*
274 	 * We need to start before any time suffix.
275 	 */
276 	for (j = strlen(name); j >= len; j--) {
277 		if (name[j] >= '0' && name[j] <= '9')
278 			break;
279 		suffix = &name[j];
280 	}
281 
282 	ASSERT(suffix != NULL);
283 
284 	/*
285 	 * Now determine the numerical value present in the probe name.
286 	 */
287 	for (; j >= len; j--) {
288 		if (name[j] < '0' || name[j] > '9')
289 			return;
290 
291 		val += (name[j] - '0') * mult;
292 		mult *= (hrtime_t)10;
293 	}
294 
295 	if (val == 0)
296 		return;
297 
298 	/*
299 	 * Look-up the suffix to determine the multiplier.
300 	 */
301 	for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
302 		if (strcasecmp(suffixes[i].name, suffix) == 0) {
303 			mult = suffixes[i].mult;
304 			break;
305 		}
306 	}
307 
308 	if (suffixes[i].name == NULL && *suffix != '\0')
309 		return;
310 
311 	if (mult == 0) {
312 		/*
313 		 * The default is frequency-per-second.
314 		 */
315 		val = NANOSEC / val;
316 	} else {
317 		val *= mult;
318 	}
319 
320 	profile_create(val, name, kind);
321 }
322 
323 /*ARGSUSED*/
324 static void
325 profile_destroy(void *arg, dtrace_id_t id, void *parg)
326 {
327 	profile_probe_t *prof = parg;
328 
329 	ASSERT(prof->prof_cyclic == CYCLIC_NONE);
330 	kmem_free(prof, sizeof (profile_probe_t));
331 
332 	ASSERT(profile_total >= 1);
333 	atomic_add_32(&profile_total, -1);
334 }
335 
336 /*ARGSUSED*/
337 static void
338 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
339 {
340 	profile_probe_t *prof = arg;
341 	profile_probe_percpu_t *pcpu;
342 
343 	pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
344 	pcpu->profc_probe = prof;
345 
346 	hdlr->cyh_func = profile_fire;
347 	hdlr->cyh_arg = pcpu;
348 	hdlr->cyh_level = CY_HIGH_LEVEL;
349 
350 	when->cyt_interval = prof->prof_interval;
351 	when->cyt_when = dtrace_gethrtime() + when->cyt_interval;
352 
353 	pcpu->profc_expected = when->cyt_when;
354 	pcpu->profc_interval = when->cyt_interval;
355 }
356 
357 /*ARGSUSED*/
358 static void
359 profile_offline(void *arg, cpu_t *cpu, void *oarg)
360 {
361 	profile_probe_percpu_t *pcpu = oarg;
362 
363 	ASSERT(pcpu->profc_probe == arg);
364 	kmem_free(pcpu, sizeof (profile_probe_percpu_t));
365 }
366 
367 /*ARGSUSED*/
368 static void
369 profile_enable(void *arg, dtrace_id_t id, void *parg)
370 {
371 	profile_probe_t *prof = parg;
372 	cyc_omni_handler_t omni;
373 	cyc_handler_t hdlr;
374 	cyc_time_t when;
375 
376 	ASSERT(prof->prof_interval != 0);
377 	ASSERT(MUTEX_HELD(&cpu_lock));
378 
379 	if (prof->prof_kind == PROF_TICK) {
380 		hdlr.cyh_func = profile_tick;
381 		hdlr.cyh_arg = prof;
382 		hdlr.cyh_level = CY_HIGH_LEVEL;
383 
384 		when.cyt_interval = prof->prof_interval;
385 		when.cyt_when = dtrace_gethrtime() + when.cyt_interval;
386 	} else {
387 		ASSERT(prof->prof_kind == PROF_PROFILE);
388 		omni.cyo_online = profile_online;
389 		omni.cyo_offline = profile_offline;
390 		omni.cyo_arg = prof;
391 	}
392 
393 	if (prof->prof_kind == PROF_TICK) {
394 		prof->prof_cyclic = cyclic_add(&hdlr, &when);
395 	} else {
396 		prof->prof_cyclic = cyclic_add_omni(&omni);
397 	}
398 }
399 
400 /*ARGSUSED*/
401 static void
402 profile_disable(void *arg, dtrace_id_t id, void *parg)
403 {
404 	profile_probe_t *prof = parg;
405 
406 	ASSERT(prof->prof_cyclic != CYCLIC_NONE);
407 	ASSERT(MUTEX_HELD(&cpu_lock));
408 
409 	cyclic_remove(prof->prof_cyclic);
410 	prof->prof_cyclic = CYCLIC_NONE;
411 }
412 
413 /*ARGSUSED*/
414 static int
415 profile_usermode(void *arg, dtrace_id_t id, void *parg)
416 {
417 	return (CPU->cpu_profile_pc == 0);
418 }
419 
420 static dtrace_pattr_t profile_attr = {
421 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
422 { DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN },
423 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
424 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
425 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
426 };
427 
428 static dtrace_pops_t profile_pops = {
429 	profile_provide,
430 	NULL,
431 	profile_enable,
432 	profile_disable,
433 	NULL,
434 	NULL,
435 	NULL,
436 	NULL,
437 	profile_usermode,
438 	profile_destroy
439 };
440 
441 static int
442 profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
443 {
444 	switch (cmd) {
445 	case DDI_ATTACH:
446 		break;
447 	case DDI_RESUME:
448 		return (DDI_SUCCESS);
449 	default:
450 		return (DDI_FAILURE);
451 	}
452 
453 	if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0,
454 	    DDI_PSEUDO, NULL) == DDI_FAILURE ||
455 	    dtrace_register("profile", &profile_attr,
456 	    DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, 0,
457 	    &profile_pops, NULL, &profile_id) != 0) {
458 		ddi_remove_minor_node(devi, NULL);
459 		return (DDI_FAILURE);
460 	}
461 
462 	profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
463 	    "profile-max-probes", PROFILE_MAX_DEFAULT);
464 
465 	ddi_report_dev(devi);
466 	profile_devi = devi;
467 	return (DDI_SUCCESS);
468 }
469 
470 static int
471 profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
472 {
473 	switch (cmd) {
474 	case DDI_DETACH:
475 		break;
476 	case DDI_SUSPEND:
477 		return (DDI_SUCCESS);
478 	default:
479 		return (DDI_FAILURE);
480 	}
481 
482 	if (dtrace_unregister(profile_id) != 0)
483 		return (DDI_FAILURE);
484 
485 	ddi_remove_minor_node(devi, NULL);
486 	return (DDI_SUCCESS);
487 }
488 
489 /*ARGSUSED*/
490 static int
491 profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
492 {
493 	int error;
494 
495 	switch (infocmd) {
496 	case DDI_INFO_DEVT2DEVINFO:
497 		*result = (void *)profile_devi;
498 		error = DDI_SUCCESS;
499 		break;
500 	case DDI_INFO_DEVT2INSTANCE:
501 		*result = (void *)0;
502 		error = DDI_SUCCESS;
503 		break;
504 	default:
505 		error = DDI_FAILURE;
506 	}
507 	return (error);
508 }
509 
510 /*ARGSUSED*/
511 static int
512 profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
513 {
514 	return (0);
515 }
516 
517 static struct cb_ops profile_cb_ops = {
518 	profile_open,		/* open */
519 	nodev,			/* close */
520 	nulldev,		/* strategy */
521 	nulldev,		/* print */
522 	nodev,			/* dump */
523 	nodev,			/* read */
524 	nodev,			/* write */
525 	nodev,			/* ioctl */
526 	nodev,			/* devmap */
527 	nodev,			/* mmap */
528 	nodev,			/* segmap */
529 	nochpoll,		/* poll */
530 	ddi_prop_op,		/* cb_prop_op */
531 	0,			/* streamtab  */
532 	D_NEW | D_MP		/* Driver compatibility flag */
533 };
534 
535 static struct dev_ops profile_ops = {
536 	DEVO_REV,		/* devo_rev, */
537 	0,			/* refcnt  */
538 	profile_info,		/* get_dev_info */
539 	nulldev,		/* identify */
540 	nulldev,		/* probe */
541 	profile_attach,		/* attach */
542 	profile_detach,		/* detach */
543 	nodev,			/* reset */
544 	&profile_cb_ops,	/* driver operations */
545 	NULL,			/* bus operations */
546 	nodev			/* dev power */
547 };
548 
549 /*
550  * Module linkage information for the kernel.
551  */
552 static struct modldrv modldrv = {
553 	&mod_driverops,		/* module type (this is a pseudo driver) */
554 	"Profile Interrupt Tracing",	/* name of module */
555 	&profile_ops,		/* driver ops */
556 };
557 
558 static struct modlinkage modlinkage = {
559 	MODREV_1,
560 	(void *)&modldrv,
561 	NULL
562 };
563 
564 int
565 _init(void)
566 {
567 	return (mod_install(&modlinkage));
568 }
569 
570 int
571 _info(struct modinfo *modinfop)
572 {
573 	return (mod_info(&modlinkage, modinfop));
574 }
575 
576 int
577 _fini(void)
578 {
579 	return (mod_remove(&modlinkage));
580 }
581