xref: /illumos-gate/usr/src/uts/common/dtrace/profile.c (revision 06e1a714)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/errno.h>
29 #include <sys/stat.h>
30 #include <sys/modctl.h>
31 #include <sys/conf.h>
32 #include <sys/systm.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/cpuvar.h>
36 #include <sys/kmem.h>
37 #include <sys/strsubr.h>
38 #include <sys/dtrace.h>
39 #include <sys/cyclic.h>
40 #include <sys/atomic.h>
41 
42 static dev_info_t *profile_devi;
43 static dtrace_provider_id_t profile_id;
44 
45 /*
46  * Regardless of platform, there are five artificial frames in the case of the
47  * profile provider:
48  *
49  *	profile_fire
50  *	cyclic_expire
51  *	cyclic_fire
52  *	[ cbe ]
53  *	[ locore ]
54  *
55  * On amd64, there are two frames associated with locore:  one in locore, and
56  * another in common interrupt dispatch code.  (i386 has not been modified to
57  * use this common layer.)  Further, on i386, the interrupted instruction
58  * appears as its own stack frame.  All of this means that we need to add one
59  * frame for amd64, and then take one away for both amd64 and i386.
60  *
61  * On SPARC, the picture is further complicated because the compiler
62  * optimizes away tail-calls -- so the following frames are optimized away:
63  *
64  * 	profile_fire
65  *	cyclic_expire
66  *
67  * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
68  * frame cannot be tail-call eliminated, yielding four frames in this case.
69  *
70  * All of the above constraints lead to the mess below.  Yes, the profile
71  * provider should ideally figure this out on-the-fly by hiting one of its own
72  * probes and then walking its own stack trace.  This is complicated, however,
73  * and the static definition doesn't seem to be overly brittle.  Still, we
74  * allow for a manual override in case we get it completely wrong.
75  */
76 #ifdef __amd64
77 #define	PROF_ARTIFICIAL_FRAMES	7
78 #else
79 #ifdef __i386
80 #define	PROF_ARTIFICIAL_FRAMES	6
81 #else
82 #ifdef __sparc
83 #ifdef DEBUG
84 #define	PROF_ARTIFICIAL_FRAMES	4
85 #else
86 #define	PROF_ARTIFICIAL_FRAMES	3
87 #endif
88 #endif
89 #endif
90 #endif
91 
92 #define	PROF_NAMELEN		15
93 
94 #define	PROF_PROFILE		0
95 #define	PROF_TICK		1
96 #define	PROF_PREFIX_PROFILE	"profile-"
97 #define	PROF_PREFIX_TICK	"tick-"
98 
99 typedef struct profile_probe {
100 	char		prof_name[PROF_NAMELEN];
101 	dtrace_id_t	prof_id;
102 	int		prof_kind;
103 	hrtime_t	prof_interval;
104 	cyclic_id_t	prof_cyclic;
105 } profile_probe_t;
106 
107 typedef struct profile_probe_percpu {
108 	hrtime_t	profc_expected;
109 	hrtime_t	profc_interval;
110 	profile_probe_t	*profc_probe;
111 } profile_probe_percpu_t;
112 
113 hrtime_t	profile_interval_min = NANOSEC / 5000;		/* 5000 hz */
114 int		profile_aframes = 0;				/* override */
115 
116 static int profile_rates[] = {
117     97, 199, 499, 997, 1999,
118     4001, 4999, 0, 0, 0,
119     0, 0, 0, 0, 0,
120     0, 0, 0, 0, 0
121 };
122 
123 static int profile_ticks[] = {
124     1, 10, 100, 500, 1000,
125     5000, 0, 0, 0, 0,
126     0, 0, 0, 0, 0
127 };
128 
129 /*
130  * profile_max defines the upper bound on the number of profile probes that
131  * can exist (this is to prevent malicious or clumsy users from exhausing
132  * system resources by creating a slew of profile probes). At mod load time,
133  * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
134  * present in the profile.conf file.
135  */
136 #define	PROFILE_MAX_DEFAULT	1000	/* default max. number of probes */
137 static uint32_t profile_max;		/* maximum number of profile probes */
138 static uint32_t profile_total;	/* current number of profile probes */
139 
140 static void
141 profile_fire(void *arg)
142 {
143 	profile_probe_percpu_t *pcpu = arg;
144 	profile_probe_t *prof = pcpu->profc_probe;
145 	hrtime_t late;
146 
147 	late = dtrace_gethrtime() - pcpu->profc_expected;
148 	pcpu->profc_expected += pcpu->profc_interval;
149 
150 	dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
151 	    CPU->cpu_profile_upc, late, 0, 0);
152 }
153 
154 static void
155 profile_tick(void *arg)
156 {
157 	profile_probe_t *prof = arg;
158 
159 	dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
160 	    CPU->cpu_profile_upc, 0, 0, 0);
161 }
162 
163 static void
164 profile_create(hrtime_t interval, const char *name, int kind)
165 {
166 	profile_probe_t *prof;
167 
168 	if (interval < profile_interval_min)
169 		return;
170 
171 	if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
172 		return;
173 
174 	atomic_add_32(&profile_total, 1);
175 	if (profile_total > profile_max) {
176 		atomic_add_32(&profile_total, -1);
177 		return;
178 	}
179 
180 	prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
181 	(void) strcpy(prof->prof_name, name);
182 	prof->prof_interval = interval;
183 	prof->prof_cyclic = CYCLIC_NONE;
184 	prof->prof_kind = kind;
185 	prof->prof_id = dtrace_probe_create(profile_id,
186 	    NULL, NULL, name,
187 	    profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof);
188 }
189 
190 /*ARGSUSED*/
191 static void
192 profile_provide(void *arg, const dtrace_probedesc_t *desc)
193 {
194 	int i, j, rate, kind;
195 	hrtime_t val = 0, mult = 1, len;
196 	const char *name, *suffix = NULL;
197 
198 	const struct {
199 		char *prefix;
200 		int kind;
201 	} types[] = {
202 		{ PROF_PREFIX_PROFILE, PROF_PROFILE },
203 		{ PROF_PREFIX_TICK, PROF_TICK },
204 		{ NULL, NULL }
205 	};
206 
207 	const struct {
208 		char *name;
209 		hrtime_t mult;
210 	} suffixes[] = {
211 		{ "ns", 	NANOSEC / NANOSEC },
212 		{ "nsec",	NANOSEC / NANOSEC },
213 		{ "us",		NANOSEC / MICROSEC },
214 		{ "usec",	NANOSEC / MICROSEC },
215 		{ "ms",		NANOSEC / MILLISEC },
216 		{ "msec",	NANOSEC / MILLISEC },
217 		{ "s",		NANOSEC / SEC },
218 		{ "sec",	NANOSEC / SEC },
219 		{ "m",		NANOSEC * (hrtime_t)60 },
220 		{ "min",	NANOSEC * (hrtime_t)60 },
221 		{ "h",		NANOSEC * (hrtime_t)(60 * 60) },
222 		{ "hour",	NANOSEC * (hrtime_t)(60 * 60) },
223 		{ "d",		NANOSEC * (hrtime_t)(24 * 60 * 60) },
224 		{ "day",	NANOSEC * (hrtime_t)(24 * 60 * 60) },
225 		{ "hz",		0 },
226 		{ NULL }
227 	};
228 
229 	if (desc == NULL) {
230 		char n[PROF_NAMELEN];
231 
232 		/*
233 		 * If no description was provided, provide all of our probes.
234 		 */
235 		for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
236 			if ((rate = profile_rates[i]) == 0)
237 				continue;
238 
239 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
240 			    PROF_PREFIX_PROFILE, rate);
241 			profile_create(NANOSEC / rate, n, PROF_PROFILE);
242 		}
243 
244 		for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
245 			if ((rate = profile_ticks[i]) == 0)
246 				continue;
247 
248 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
249 			    PROF_PREFIX_TICK, rate);
250 			profile_create(NANOSEC / rate, n, PROF_TICK);
251 		}
252 
253 		return;
254 	}
255 
256 	name = desc->dtpd_name;
257 
258 	for (i = 0; types[i].prefix != NULL; i++) {
259 		len = strlen(types[i].prefix);
260 
261 		if (strncmp(name, types[i].prefix, len) != 0)
262 			continue;
263 		break;
264 	}
265 
266 	if (types[i].prefix == NULL)
267 		return;
268 
269 	kind = types[i].kind;
270 	j = strlen(name) - len;
271 
272 	/*
273 	 * We need to start before any time suffix.
274 	 */
275 	for (j = strlen(name); j >= len; j--) {
276 		if (name[j] >= '0' && name[j] <= '9')
277 			break;
278 		suffix = &name[j];
279 	}
280 
281 	ASSERT(suffix != NULL);
282 
283 	/*
284 	 * Now determine the numerical value present in the probe name.
285 	 */
286 	for (; j >= len; j--) {
287 		if (name[j] < '0' || name[j] > '9')
288 			return;
289 
290 		val += (name[j] - '0') * mult;
291 		mult *= (hrtime_t)10;
292 	}
293 
294 	if (val == 0)
295 		return;
296 
297 	/*
298 	 * Look-up the suffix to determine the multiplier.
299 	 */
300 	for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
301 		if (strcasecmp(suffixes[i].name, suffix) == 0) {
302 			mult = suffixes[i].mult;
303 			break;
304 		}
305 	}
306 
307 	if (suffixes[i].name == NULL && *suffix != '\0')
308 		return;
309 
310 	if (mult == 0) {
311 		/*
312 		 * The default is frequency-per-second.
313 		 */
314 		val = NANOSEC / val;
315 	} else {
316 		val *= mult;
317 	}
318 
319 	profile_create(val, name, kind);
320 }
321 
322 /*ARGSUSED*/
323 static void
324 profile_destroy(void *arg, dtrace_id_t id, void *parg)
325 {
326 	profile_probe_t *prof = parg;
327 
328 	ASSERT(prof->prof_cyclic == CYCLIC_NONE);
329 	kmem_free(prof, sizeof (profile_probe_t));
330 
331 	ASSERT(profile_total >= 1);
332 	atomic_add_32(&profile_total, -1);
333 }
334 
335 /*ARGSUSED*/
336 static void
337 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
338 {
339 	profile_probe_t *prof = arg;
340 	profile_probe_percpu_t *pcpu;
341 
342 	pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
343 	pcpu->profc_probe = prof;
344 
345 	hdlr->cyh_func = profile_fire;
346 	hdlr->cyh_arg = pcpu;
347 	hdlr->cyh_level = CY_HIGH_LEVEL;
348 
349 	when->cyt_interval = prof->prof_interval;
350 	when->cyt_when = dtrace_gethrtime() + when->cyt_interval;
351 
352 	pcpu->profc_expected = when->cyt_when;
353 	pcpu->profc_interval = when->cyt_interval;
354 }
355 
356 /*ARGSUSED*/
357 static void
358 profile_offline(void *arg, cpu_t *cpu, void *oarg)
359 {
360 	profile_probe_percpu_t *pcpu = oarg;
361 
362 	ASSERT(pcpu->profc_probe == arg);
363 	kmem_free(pcpu, sizeof (profile_probe_percpu_t));
364 }
365 
366 /*ARGSUSED*/
367 static void
368 profile_enable(void *arg, dtrace_id_t id, void *parg)
369 {
370 	profile_probe_t *prof = parg;
371 	cyc_omni_handler_t omni;
372 	cyc_handler_t hdlr;
373 	cyc_time_t when;
374 
375 	ASSERT(prof->prof_interval != 0);
376 	ASSERT(MUTEX_HELD(&cpu_lock));
377 
378 	if (prof->prof_kind == PROF_TICK) {
379 		hdlr.cyh_func = profile_tick;
380 		hdlr.cyh_arg = prof;
381 		hdlr.cyh_level = CY_HIGH_LEVEL;
382 
383 		when.cyt_interval = prof->prof_interval;
384 		when.cyt_when = dtrace_gethrtime() + when.cyt_interval;
385 	} else {
386 		ASSERT(prof->prof_kind == PROF_PROFILE);
387 		omni.cyo_online = profile_online;
388 		omni.cyo_offline = profile_offline;
389 		omni.cyo_arg = prof;
390 	}
391 
392 	if (prof->prof_kind == PROF_TICK) {
393 		prof->prof_cyclic = cyclic_add(&hdlr, &when);
394 	} else {
395 		prof->prof_cyclic = cyclic_add_omni(&omni);
396 	}
397 }
398 
399 /*ARGSUSED*/
400 static void
401 profile_disable(void *arg, dtrace_id_t id, void *parg)
402 {
403 	profile_probe_t *prof = parg;
404 
405 	ASSERT(prof->prof_cyclic != CYCLIC_NONE);
406 	ASSERT(MUTEX_HELD(&cpu_lock));
407 
408 	cyclic_remove(prof->prof_cyclic);
409 	prof->prof_cyclic = CYCLIC_NONE;
410 }
411 
412 /*ARGSUSED*/
413 static int
414 profile_usermode(void *arg, dtrace_id_t id, void *parg)
415 {
416 	return (CPU->cpu_profile_pc == 0);
417 }
418 
419 static dtrace_pattr_t profile_attr = {
420 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
421 { DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN },
422 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
423 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
424 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
425 };
426 
427 static dtrace_pops_t profile_pops = {
428 	profile_provide,
429 	NULL,
430 	profile_enable,
431 	profile_disable,
432 	NULL,
433 	NULL,
434 	NULL,
435 	NULL,
436 	profile_usermode,
437 	profile_destroy
438 };
439 
440 static int
441 profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
442 {
443 	switch (cmd) {
444 	case DDI_ATTACH:
445 		break;
446 	case DDI_RESUME:
447 		return (DDI_SUCCESS);
448 	default:
449 		return (DDI_FAILURE);
450 	}
451 
452 	if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0,
453 	    DDI_PSEUDO, NULL) == DDI_FAILURE ||
454 	    dtrace_register("profile", &profile_attr,
455 	    DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL,
456 	    &profile_pops, NULL, &profile_id) != 0) {
457 		ddi_remove_minor_node(devi, NULL);
458 		return (DDI_FAILURE);
459 	}
460 
461 	profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
462 	    "profile-max-probes", PROFILE_MAX_DEFAULT);
463 
464 	ddi_report_dev(devi);
465 	profile_devi = devi;
466 	return (DDI_SUCCESS);
467 }
468 
469 static int
470 profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
471 {
472 	switch (cmd) {
473 	case DDI_DETACH:
474 		break;
475 	case DDI_SUSPEND:
476 		return (DDI_SUCCESS);
477 	default:
478 		return (DDI_FAILURE);
479 	}
480 
481 	if (dtrace_unregister(profile_id) != 0)
482 		return (DDI_FAILURE);
483 
484 	ddi_remove_minor_node(devi, NULL);
485 	return (DDI_SUCCESS);
486 }
487 
488 /*ARGSUSED*/
489 static int
490 profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
491 {
492 	int error;
493 
494 	switch (infocmd) {
495 	case DDI_INFO_DEVT2DEVINFO:
496 		*result = (void *)profile_devi;
497 		error = DDI_SUCCESS;
498 		break;
499 	case DDI_INFO_DEVT2INSTANCE:
500 		*result = (void *)0;
501 		error = DDI_SUCCESS;
502 		break;
503 	default:
504 		error = DDI_FAILURE;
505 	}
506 	return (error);
507 }
508 
509 /*ARGSUSED*/
510 static int
511 profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
512 {
513 	return (0);
514 }
515 
516 static struct cb_ops profile_cb_ops = {
517 	profile_open,		/* open */
518 	nodev,			/* close */
519 	nulldev,		/* strategy */
520 	nulldev,		/* print */
521 	nodev,			/* dump */
522 	nodev,			/* read */
523 	nodev,			/* write */
524 	nodev,			/* ioctl */
525 	nodev,			/* devmap */
526 	nodev,			/* mmap */
527 	nodev,			/* segmap */
528 	nochpoll,		/* poll */
529 	ddi_prop_op,		/* cb_prop_op */
530 	0,			/* streamtab  */
531 	D_NEW | D_MP		/* Driver compatibility flag */
532 };
533 
534 static struct dev_ops profile_ops = {
535 	DEVO_REV,		/* devo_rev, */
536 	0,			/* refcnt  */
537 	profile_info,		/* get_dev_info */
538 	nulldev,		/* identify */
539 	nulldev,		/* probe */
540 	profile_attach,		/* attach */
541 	profile_detach,		/* detach */
542 	nodev,			/* reset */
543 	&profile_cb_ops,	/* driver operations */
544 	NULL,			/* bus operations */
545 	nodev			/* dev power */
546 };
547 
548 /*
549  * Module linkage information for the kernel.
550  */
551 static struct modldrv modldrv = {
552 	&mod_driverops,		/* module type (this is a pseudo driver) */
553 	"Profile Interrupt Tracing",	/* name of module */
554 	&profile_ops,		/* driver ops */
555 };
556 
557 static struct modlinkage modlinkage = {
558 	MODREV_1,
559 	(void *)&modldrv,
560 	NULL
561 };
562 
563 int
564 _init(void)
565 {
566 	return (mod_install(&modlinkage));
567 }
568 
569 int
570 _info(struct modinfo *modinfop)
571 {
572 	return (mod_info(&modlinkage, modinfop));
573 }
574 
575 int
576 _fini(void)
577 {
578 	return (mod_remove(&modlinkage));
579 }
580