xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision 148a8da8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/fcntl.h>
38 #include <sys/filio.h>
39 #include <sys/kdb.h>
40 #include <sys/kernel.h>
41 #include <sys/kmem.h>
42 #include <sys/kthread.h>
43 #include <sys/limits.h>
44 #include <sys/linker.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/mutex.h>
49 #include <sys/pcpu.h>
50 #include <sys/poll.h>
51 #include <sys/proc.h>
52 #include <sys/selinfo.h>
53 #include <sys/smp.h>
54 #include <sys/syscall.h>
55 #include <sys/sysent.h>
56 #include <sys/sysproto.h>
57 #include <sys/uio.h>
58 #include <sys/unistd.h>
59 #include <machine/stdarg.h>
60 
61 #include <sys/dtrace.h>
62 #include <sys/dtrace_bsd.h>
63 
64 #include "fbt.h"
65 
66 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
67 
68 dtrace_provider_id_t	fbt_id;
69 fbt_probe_t		**fbt_probetab;
70 int			fbt_probetab_mask;
71 
72 static d_open_t	fbt_open;
73 static int	fbt_unload(void);
74 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
75 static void	fbt_provide_module(void *, modctl_t *);
76 static void	fbt_destroy(void *, dtrace_id_t, void *);
77 static void	fbt_enable(void *, dtrace_id_t, void *);
78 static void	fbt_disable(void *, dtrace_id_t, void *);
79 static void	fbt_load(void *);
80 static void	fbt_suspend(void *, dtrace_id_t, void *);
81 static void	fbt_resume(void *, dtrace_id_t, void *);
82 
83 static struct cdevsw fbt_cdevsw = {
84 	.d_version	= D_VERSION,
85 	.d_open		= fbt_open,
86 	.d_name		= "fbt",
87 };
88 
89 static dtrace_pattr_t fbt_attr = {
90 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
91 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
93 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
94 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
95 };
96 
97 static dtrace_pops_t fbt_pops = {
98 	.dtps_provide =		NULL,
99 	.dtps_provide_module =	fbt_provide_module,
100 	.dtps_enable =		fbt_enable,
101 	.dtps_disable =		fbt_disable,
102 	.dtps_suspend =		fbt_suspend,
103 	.dtps_resume =		fbt_resume,
104 	.dtps_getargdesc =	fbt_getargdesc,
105 	.dtps_getargval =	NULL,
106 	.dtps_usermode =	NULL,
107 	.dtps_destroy =		fbt_destroy
108 };
109 
110 static struct cdev		*fbt_cdev;
111 static int			fbt_probetab_size;
112 static int			fbt_verbose = 0;
113 
114 int
115 fbt_excluded(const char *name)
116 {
117 
118 	if (strncmp(name, "dtrace_", 7) == 0 &&
119 	    strncmp(name, "dtrace_safe_", 12) != 0) {
120 		/*
121 		 * Anything beginning with "dtrace_" may be called
122 		 * from probe context unless it explicitly indicates
123 		 * that it won't be called from probe context by
124 		 * using the prefix "dtrace_safe_".
125 		 */
126 		return (1);
127 	}
128 
129 	/*
130 	 * Lock owner methods may be called from probe context.
131 	 */
132 	if (strcmp(name, "owner_mtx") == 0 ||
133 	    strcmp(name, "owner_rm") == 0 ||
134 	    strcmp(name, "owner_rw") == 0 ||
135 	    strcmp(name, "owner_sx") == 0)
136 		return (1);
137 
138 	/*
139 	 * When DTrace is built into the kernel we need to exclude
140 	 * the FBT functions from instrumentation.
141 	 */
142 #ifndef _KLD_MODULE
143 	if (strncmp(name, "fbt_", 4) == 0)
144 		return (1);
145 #endif
146 
147 	return (0);
148 }
149 
150 static void
151 fbt_doubletrap(void)
152 {
153 	fbt_probe_t *fbt;
154 	int i;
155 
156 	for (i = 0; i < fbt_probetab_size; i++) {
157 		fbt = fbt_probetab[i];
158 
159 		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
160 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
161 	}
162 }
163 
164 static void
165 fbt_provide_module(void *arg, modctl_t *lf)
166 {
167 	char modname[MAXPATHLEN];
168 	int i;
169 	size_t len;
170 
171 	strlcpy(modname, lf->filename, sizeof(modname));
172 	len = strlen(modname);
173 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
174 		modname[len - 3] = '\0';
175 
176 	/*
177 	 * Employees of dtrace and their families are ineligible.  Void
178 	 * where prohibited.
179 	 */
180 	if (strcmp(modname, "dtrace") == 0)
181 		return;
182 
183 	/*
184 	 * To register with DTrace, a module must list 'dtrace' as a
185 	 * dependency in order for the kernel linker to resolve
186 	 * symbols like dtrace_register(). All modules with such a
187 	 * dependency are ineligible for FBT tracing.
188 	 */
189 	for (i = 0; i < lf->ndeps; i++)
190 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
191 			return;
192 
193 	if (lf->fbt_nentries) {
194 		/*
195 		 * This module has some FBT entries allocated; we're afraid
196 		 * to screw with it.
197 		 */
198 		return;
199 	}
200 
201 	/*
202 	 * List the functions in the module and the symbol values.
203 	 */
204 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
205 }
206 
207 static void
208 fbt_destroy_one(fbt_probe_t *fbt)
209 {
210 	fbt_probe_t *hash, *hashprev, *next;
211 	int ndx;
212 
213 	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
214 	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
215 	    hashprev = hash, hash = hash->fbtp_hashnext) {
216 		if (hash == fbt) {
217 			if ((next = fbt->fbtp_tracenext) != NULL)
218 				next->fbtp_hashnext = hash->fbtp_hashnext;
219 			else
220 				next = hash->fbtp_hashnext;
221 			if (hashprev != NULL)
222 				hashprev->fbtp_hashnext = next;
223 			else
224 				fbt_probetab[ndx] = next;
225 			goto free;
226 		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
227 			for (next = hash; next->fbtp_tracenext != NULL;
228 			    next = next->fbtp_tracenext) {
229 				if (fbt == next->fbtp_tracenext) {
230 					next->fbtp_tracenext =
231 					    fbt->fbtp_tracenext;
232 					goto free;
233 				}
234 			}
235 		}
236 	}
237 	panic("probe %p not found in hash table", fbt);
238 free:
239 	free(fbt, M_FBT);
240 }
241 
242 static void
243 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
244 {
245 	fbt_probe_t *fbt = parg, *next;
246 	modctl_t *ctl;
247 
248 	do {
249 		ctl = fbt->fbtp_ctl;
250 		ctl->fbt_nentries--;
251 
252 		next = fbt->fbtp_probenext;
253 		fbt_destroy_one(fbt);
254 		fbt = next;
255 	} while (fbt != NULL);
256 }
257 
258 static void
259 fbt_enable(void *arg, dtrace_id_t id, void *parg)
260 {
261 	fbt_probe_t *fbt = parg;
262 	modctl_t *ctl = fbt->fbtp_ctl;
263 
264 	ctl->nenabled++;
265 
266 	/*
267 	 * Now check that our modctl has the expected load count.  If it
268 	 * doesn't, this module must have been unloaded and reloaded -- and
269 	 * we're not going to touch it.
270 	 */
271 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
272 		if (fbt_verbose) {
273 			printf("fbt is failing for probe %s "
274 			    "(module %s reloaded)",
275 			    fbt->fbtp_name, ctl->filename);
276 		}
277 
278 		return;
279 	}
280 
281 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
282 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
283 		fbt->fbtp_enabled++;
284 	}
285 }
286 
287 static void
288 fbt_disable(void *arg, dtrace_id_t id, void *parg)
289 {
290 	fbt_probe_t *fbt = parg, *hash;
291 	modctl_t *ctl = fbt->fbtp_ctl;
292 
293 	ASSERT(ctl->nenabled > 0);
294 	ctl->nenabled--;
295 
296 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
297 		return;
298 
299 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
300 		fbt->fbtp_enabled--;
301 
302 		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
303 		    hash != NULL; hash = hash->fbtp_hashnext) {
304 			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
305 				for (; hash != NULL; hash = hash->fbtp_tracenext)
306 					if (hash->fbtp_enabled > 0)
307 						break;
308 				break;
309 			}
310 		}
311 		if (hash == NULL)
312 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
313 	}
314 }
315 
316 static void
317 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
318 {
319 	fbt_probe_t *fbt = parg;
320 	modctl_t *ctl = fbt->fbtp_ctl;
321 
322 	ASSERT(ctl->nenabled > 0);
323 
324 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
325 		return;
326 
327 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
328 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
329 }
330 
331 static void
332 fbt_resume(void *arg, dtrace_id_t id, void *parg)
333 {
334 	fbt_probe_t *fbt = parg;
335 	modctl_t *ctl = fbt->fbtp_ctl;
336 
337 	ASSERT(ctl->nenabled > 0);
338 
339 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
340 		return;
341 
342 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
343 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
344 }
345 
346 static int
347 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
348 {
349 	const Elf_Sym *symp = lc->symtab;;
350 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
351 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
352 	int i;
353 	uint32_t *ctfoff;
354 	uint32_t objtoff = hp->cth_objtoff;
355 	uint32_t funcoff = hp->cth_funcoff;
356 	ushort_t info;
357 	ushort_t vlen;
358 
359 	/* Sanity check. */
360 	if (hp->cth_magic != CTF_MAGIC) {
361 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
362 		return (EINVAL);
363 	}
364 
365 	if (lc->symtab == NULL) {
366 		printf("No symbol table in '%s'\n",lf->pathname);
367 		return (EINVAL);
368 	}
369 
370 	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
371 	*lc->ctfoffp = ctfoff;
372 
373 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
374 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
375 			*ctfoff = 0xffffffff;
376 			continue;
377 		}
378 
379 		switch (ELF_ST_TYPE(symp->st_info)) {
380 		case STT_OBJECT:
381 			if (objtoff >= hp->cth_funcoff ||
382                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
383 				*ctfoff = 0xffffffff;
384                                 break;
385                         }
386 
387                         *ctfoff = objtoff;
388                         objtoff += sizeof (ushort_t);
389 			break;
390 
391 		case STT_FUNC:
392 			if (funcoff >= hp->cth_typeoff) {
393 				*ctfoff = 0xffffffff;
394 				break;
395 			}
396 
397 			*ctfoff = funcoff;
398 
399 			info = *((const ushort_t *)(ctfdata + funcoff));
400 			vlen = CTF_INFO_VLEN(info);
401 
402 			/*
403 			 * If we encounter a zero pad at the end, just skip it.
404 			 * Otherwise skip over the function and its return type
405 			 * (+2) and the argument list (vlen).
406 			 */
407 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
408 				funcoff += sizeof (ushort_t); /* skip pad */
409 			else
410 				funcoff += sizeof (ushort_t) * (vlen + 2);
411 			break;
412 
413 		default:
414 			*ctfoff = 0xffffffff;
415 			break;
416 		}
417 	}
418 
419 	return (0);
420 }
421 
422 static ssize_t
423 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
424     ssize_t *incrementp)
425 {
426 	ssize_t size, increment;
427 
428 	if (version > CTF_VERSION_1 &&
429 	    tp->ctt_size == CTF_LSIZE_SENT) {
430 		size = CTF_TYPE_LSIZE(tp);
431 		increment = sizeof (ctf_type_t);
432 	} else {
433 		size = tp->ctt_size;
434 		increment = sizeof (ctf_stype_t);
435 	}
436 
437 	if (sizep)
438 		*sizep = size;
439 	if (incrementp)
440 		*incrementp = increment;
441 
442 	return (size);
443 }
444 
445 static int
446 fbt_typoff_init(linker_ctf_t *lc)
447 {
448 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
449 	const ctf_type_t *tbuf;
450 	const ctf_type_t *tend;
451 	const ctf_type_t *tp;
452 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
453 	int ctf_typemax = 0;
454 	uint32_t *xp;
455 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
456 
457 
458 	/* Sanity check. */
459 	if (hp->cth_magic != CTF_MAGIC)
460 		return (EINVAL);
461 
462 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
463 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
464 
465 	int child = hp->cth_parname != 0;
466 
467 	/*
468 	 * We make two passes through the entire type section.  In this first
469 	 * pass, we count the number of each type and the total number of types.
470 	 */
471 	for (tp = tbuf; tp < tend; ctf_typemax++) {
472 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
473 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
474 		ssize_t size, increment;
475 
476 		size_t vbytes;
477 		uint_t n;
478 
479 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
480 
481 		switch (kind) {
482 		case CTF_K_INTEGER:
483 		case CTF_K_FLOAT:
484 			vbytes = sizeof (uint_t);
485 			break;
486 		case CTF_K_ARRAY:
487 			vbytes = sizeof (ctf_array_t);
488 			break;
489 		case CTF_K_FUNCTION:
490 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
491 			break;
492 		case CTF_K_STRUCT:
493 		case CTF_K_UNION:
494 			if (size < CTF_LSTRUCT_THRESH) {
495 				ctf_member_t *mp = (ctf_member_t *)
496 				    ((uintptr_t)tp + increment);
497 
498 				vbytes = sizeof (ctf_member_t) * vlen;
499 				for (n = vlen; n != 0; n--, mp++)
500 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
501 			} else {
502 				ctf_lmember_t *lmp = (ctf_lmember_t *)
503 				    ((uintptr_t)tp + increment);
504 
505 				vbytes = sizeof (ctf_lmember_t) * vlen;
506 				for (n = vlen; n != 0; n--, lmp++)
507 					child |=
508 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
509 			}
510 			break;
511 		case CTF_K_ENUM:
512 			vbytes = sizeof (ctf_enum_t) * vlen;
513 			break;
514 		case CTF_K_FORWARD:
515 			/*
516 			 * For forward declarations, ctt_type is the CTF_K_*
517 			 * kind for the tag, so bump that population count too.
518 			 * If ctt_type is unknown, treat the tag as a struct.
519 			 */
520 			if (tp->ctt_type == CTF_K_UNKNOWN ||
521 			    tp->ctt_type >= CTF_K_MAX)
522 				pop[CTF_K_STRUCT]++;
523 			else
524 				pop[tp->ctt_type]++;
525 			/*FALLTHRU*/
526 		case CTF_K_UNKNOWN:
527 			vbytes = 0;
528 			break;
529 		case CTF_K_POINTER:
530 		case CTF_K_TYPEDEF:
531 		case CTF_K_VOLATILE:
532 		case CTF_K_CONST:
533 		case CTF_K_RESTRICT:
534 			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
535 			vbytes = 0;
536 			break;
537 		default:
538 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
539 			return (EIO);
540 		}
541 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
542 		pop[kind]++;
543 	}
544 
545 	/* account for a sentinel value below */
546 	ctf_typemax++;
547 	*lc->typlenp = ctf_typemax;
548 
549 	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
550 	    M_ZERO | M_WAITOK);
551 
552 	*lc->typoffp = xp;
553 
554 	/* type id 0 is used as a sentinel value */
555 	*xp++ = 0;
556 
557 	/*
558 	 * In the second pass, fill in the type offset.
559 	 */
560 	for (tp = tbuf; tp < tend; xp++) {
561 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
562 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
563 		ssize_t size, increment;
564 
565 		size_t vbytes;
566 		uint_t n;
567 
568 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
569 
570 		switch (kind) {
571 		case CTF_K_INTEGER:
572 		case CTF_K_FLOAT:
573 			vbytes = sizeof (uint_t);
574 			break;
575 		case CTF_K_ARRAY:
576 			vbytes = sizeof (ctf_array_t);
577 			break;
578 		case CTF_K_FUNCTION:
579 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
580 			break;
581 		case CTF_K_STRUCT:
582 		case CTF_K_UNION:
583 			if (size < CTF_LSTRUCT_THRESH) {
584 				ctf_member_t *mp = (ctf_member_t *)
585 				    ((uintptr_t)tp + increment);
586 
587 				vbytes = sizeof (ctf_member_t) * vlen;
588 				for (n = vlen; n != 0; n--, mp++)
589 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
590 			} else {
591 				ctf_lmember_t *lmp = (ctf_lmember_t *)
592 				    ((uintptr_t)tp + increment);
593 
594 				vbytes = sizeof (ctf_lmember_t) * vlen;
595 				for (n = vlen; n != 0; n--, lmp++)
596 					child |=
597 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
598 			}
599 			break;
600 		case CTF_K_ENUM:
601 			vbytes = sizeof (ctf_enum_t) * vlen;
602 			break;
603 		case CTF_K_FORWARD:
604 		case CTF_K_UNKNOWN:
605 			vbytes = 0;
606 			break;
607 		case CTF_K_POINTER:
608 		case CTF_K_TYPEDEF:
609 		case CTF_K_VOLATILE:
610 		case CTF_K_CONST:
611 		case CTF_K_RESTRICT:
612 			vbytes = 0;
613 			break;
614 		default:
615 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
616 			return (EIO);
617 		}
618 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
619 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
620 	}
621 
622 	return (0);
623 }
624 
625 /*
626  * CTF Declaration Stack
627  *
628  * In order to implement ctf_type_name(), we must convert a type graph back
629  * into a C type declaration.  Unfortunately, a type graph represents a storage
630  * class ordering of the type whereas a type declaration must obey the C rules
631  * for operator precedence, and the two orderings are frequently in conflict.
632  * For example, consider these CTF type graphs and their C declarations:
633  *
634  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
635  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
636  *
637  * In each case, parentheses are used to raise operator * to higher lexical
638  * precedence, so the string form of the C declaration cannot be constructed by
639  * walking the type graph links and forming the string from left to right.
640  *
641  * The functions in this file build a set of stacks from the type graph nodes
642  * corresponding to the C operator precedence levels in the appropriate order.
643  * The code in ctf_type_name() can then iterate over the levels and nodes in
644  * lexical precedence order and construct the final C declaration string.
645  */
646 typedef struct ctf_list {
647 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
648 	struct ctf_list *l_next; /* next pointer or head pointer */
649 } ctf_list_t;
650 
651 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
652 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
653 
654 typedef enum {
655 	CTF_PREC_BASE,
656 	CTF_PREC_POINTER,
657 	CTF_PREC_ARRAY,
658 	CTF_PREC_FUNCTION,
659 	CTF_PREC_MAX
660 } ctf_decl_prec_t;
661 
662 typedef struct ctf_decl_node {
663 	ctf_list_t cd_list;			/* linked list pointers */
664 	ctf_id_t cd_type;			/* type identifier */
665 	uint_t cd_kind;				/* type kind */
666 	uint_t cd_n;				/* type dimension if array */
667 } ctf_decl_node_t;
668 
669 typedef struct ctf_decl {
670 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
671 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
672 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
673 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
674 	char *cd_buf;				/* buffer for output */
675 	char *cd_ptr;				/* buffer location */
676 	char *cd_end;				/* buffer limit */
677 	size_t cd_len;				/* buffer space required */
678 	int cd_err;				/* saved error value */
679 } ctf_decl_t;
680 
681 /*
682  * Simple doubly-linked list append routine.  This implementation assumes that
683  * each list element contains an embedded ctf_list_t as the first member.
684  * An additional ctf_list_t is used to store the head (l_next) and tail
685  * (l_prev) pointers.  The current head and tail list elements have their
686  * previous and next pointers set to NULL, respectively.
687  */
688 static void
689 ctf_list_append(ctf_list_t *lp, void *new)
690 {
691 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
692 	ctf_list_t *q = new;		/* q = new list element */
693 
694 	lp->l_prev = q;
695 	q->l_prev = p;
696 	q->l_next = NULL;
697 
698 	if (p != NULL)
699 		p->l_next = q;
700 	else
701 		lp->l_next = q;
702 }
703 
704 /*
705  * Prepend the specified existing element to the given ctf_list_t.  The
706  * existing pointer should be pointing at a struct with embedded ctf_list_t.
707  */
708 static void
709 ctf_list_prepend(ctf_list_t *lp, void *new)
710 {
711 	ctf_list_t *p = new;		/* p = new list element */
712 	ctf_list_t *q = lp->l_next;	/* q = head list element */
713 
714 	lp->l_next = p;
715 	p->l_prev = NULL;
716 	p->l_next = q;
717 
718 	if (q != NULL)
719 		q->l_prev = p;
720 	else
721 		lp->l_prev = p;
722 }
723 
724 static void
725 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
726 {
727 	int i;
728 
729 	bzero(cd, sizeof (ctf_decl_t));
730 
731 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
732 		cd->cd_order[i] = CTF_PREC_BASE - 1;
733 
734 	cd->cd_qualp = CTF_PREC_BASE;
735 	cd->cd_ordp = CTF_PREC_BASE;
736 
737 	cd->cd_buf = buf;
738 	cd->cd_ptr = buf;
739 	cd->cd_end = buf + len;
740 }
741 
742 static void
743 ctf_decl_fini(ctf_decl_t *cd)
744 {
745 	ctf_decl_node_t *cdp, *ndp;
746 	int i;
747 
748 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
749 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
750 		    cdp != NULL; cdp = ndp) {
751 			ndp = ctf_list_next(cdp);
752 			free(cdp, M_FBT);
753 		}
754 	}
755 }
756 
757 static const ctf_type_t *
758 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
759 {
760 	const ctf_type_t *tp;
761 	uint32_t offset;
762 	uint32_t *typoff = *lc->typoffp;
763 
764 	if (type >= *lc->typlenp) {
765 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
766 		return(NULL);
767 	}
768 
769 	/* Check if the type isn't cross-referenced. */
770 	if ((offset = typoff[type]) == 0) {
771 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
772 		return(NULL);
773 	}
774 
775 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
776 
777 	return (tp);
778 }
779 
780 static void
781 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
782 {
783 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
784 	const ctf_type_t *tp;
785 	const ctf_array_t *ap;
786 	ssize_t increment;
787 
788 	bzero(arp, sizeof(*arp));
789 
790 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
791 		return;
792 
793 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
794 		return;
795 
796 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
797 
798 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
799 	arp->ctr_contents = ap->cta_contents;
800 	arp->ctr_index = ap->cta_index;
801 	arp->ctr_nelems = ap->cta_nelems;
802 }
803 
804 static const char *
805 ctf_strptr(linker_ctf_t *lc, int name)
806 {
807 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
808 	const char *strp = "";
809 
810 	if (name < 0 || name >= hp->cth_strlen)
811 		return(strp);
812 
813 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
814 
815 	return (strp);
816 }
817 
818 static void
819 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
820 {
821 	ctf_decl_node_t *cdp;
822 	ctf_decl_prec_t prec;
823 	uint_t kind, n = 1;
824 	int is_qual = 0;
825 
826 	const ctf_type_t *tp;
827 	ctf_arinfo_t ar;
828 
829 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
830 		cd->cd_err = ENOENT;
831 		return;
832 	}
833 
834 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
835 	case CTF_K_ARRAY:
836 		fbt_array_info(lc, type, &ar);
837 		ctf_decl_push(cd, lc, ar.ctr_contents);
838 		n = ar.ctr_nelems;
839 		prec = CTF_PREC_ARRAY;
840 		break;
841 
842 	case CTF_K_TYPEDEF:
843 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
844 			ctf_decl_push(cd, lc, tp->ctt_type);
845 			return;
846 		}
847 		prec = CTF_PREC_BASE;
848 		break;
849 
850 	case CTF_K_FUNCTION:
851 		ctf_decl_push(cd, lc, tp->ctt_type);
852 		prec = CTF_PREC_FUNCTION;
853 		break;
854 
855 	case CTF_K_POINTER:
856 		ctf_decl_push(cd, lc, tp->ctt_type);
857 		prec = CTF_PREC_POINTER;
858 		break;
859 
860 	case CTF_K_VOLATILE:
861 	case CTF_K_CONST:
862 	case CTF_K_RESTRICT:
863 		ctf_decl_push(cd, lc, tp->ctt_type);
864 		prec = cd->cd_qualp;
865 		is_qual++;
866 		break;
867 
868 	default:
869 		prec = CTF_PREC_BASE;
870 	}
871 
872 	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
873 	cdp->cd_type = type;
874 	cdp->cd_kind = kind;
875 	cdp->cd_n = n;
876 
877 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
878 		cd->cd_order[prec] = cd->cd_ordp++;
879 
880 	/*
881 	 * Reset cd_qualp to the highest precedence level that we've seen so
882 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
883 	 */
884 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
885 		cd->cd_qualp = prec;
886 
887 	/*
888 	 * C array declarators are ordered inside out so prepend them.  Also by
889 	 * convention qualifiers of base types precede the type specifier (e.g.
890 	 * const int vs. int const) even though the two forms are equivalent.
891 	 */
892 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
893 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
894 	else
895 		ctf_list_append(&cd->cd_nodes[prec], cdp);
896 }
897 
898 static void
899 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
900 {
901 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
902 	va_list ap;
903 	size_t n;
904 
905 	va_start(ap, format);
906 	n = vsnprintf(cd->cd_ptr, len, format, ap);
907 	va_end(ap);
908 
909 	cd->cd_ptr += MIN(n, len);
910 	cd->cd_len += n;
911 }
912 
913 static ssize_t
914 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
915 {
916 	ctf_decl_t cd;
917 	ctf_decl_node_t *cdp;
918 	ctf_decl_prec_t prec, lp, rp;
919 	int ptr, arr;
920 	uint_t k;
921 
922 	if (lc == NULL && type == CTF_ERR)
923 		return (-1); /* simplify caller code by permitting CTF_ERR */
924 
925 	ctf_decl_init(&cd, buf, len);
926 	ctf_decl_push(&cd, lc, type);
927 
928 	if (cd.cd_err != 0) {
929 		ctf_decl_fini(&cd);
930 		return (-1);
931 	}
932 
933 	/*
934 	 * If the type graph's order conflicts with lexical precedence order
935 	 * for pointers or arrays, then we need to surround the declarations at
936 	 * the corresponding lexical precedence with parentheses.  This can
937 	 * result in either a parenthesized pointer (*) as in int (*)() or
938 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
939 	 */
940 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
941 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
942 
943 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
944 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
945 
946 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
947 
948 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
949 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
950 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
951 
952 			const ctf_type_t *tp =
953 			    ctf_lookup_by_id(lc, cdp->cd_type);
954 			const char *name = ctf_strptr(lc, tp->ctt_name);
955 
956 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
957 				ctf_decl_sprintf(&cd, " ");
958 
959 			if (lp == prec) {
960 				ctf_decl_sprintf(&cd, "(");
961 				lp = -1;
962 			}
963 
964 			switch (cdp->cd_kind) {
965 			case CTF_K_INTEGER:
966 			case CTF_K_FLOAT:
967 			case CTF_K_TYPEDEF:
968 				ctf_decl_sprintf(&cd, "%s", name);
969 				break;
970 			case CTF_K_POINTER:
971 				ctf_decl_sprintf(&cd, "*");
972 				break;
973 			case CTF_K_ARRAY:
974 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
975 				break;
976 			case CTF_K_FUNCTION:
977 				ctf_decl_sprintf(&cd, "()");
978 				break;
979 			case CTF_K_STRUCT:
980 			case CTF_K_FORWARD:
981 				ctf_decl_sprintf(&cd, "struct %s", name);
982 				break;
983 			case CTF_K_UNION:
984 				ctf_decl_sprintf(&cd, "union %s", name);
985 				break;
986 			case CTF_K_ENUM:
987 				ctf_decl_sprintf(&cd, "enum %s", name);
988 				break;
989 			case CTF_K_VOLATILE:
990 				ctf_decl_sprintf(&cd, "volatile");
991 				break;
992 			case CTF_K_CONST:
993 				ctf_decl_sprintf(&cd, "const");
994 				break;
995 			case CTF_K_RESTRICT:
996 				ctf_decl_sprintf(&cd, "restrict");
997 				break;
998 			}
999 
1000 			k = cdp->cd_kind;
1001 		}
1002 
1003 		if (rp == prec)
1004 			ctf_decl_sprintf(&cd, ")");
1005 	}
1006 
1007 	ctf_decl_fini(&cd);
1008 	return (cd.cd_len);
1009 }
1010 
1011 static void
1012 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1013 {
1014 	const ushort_t *dp;
1015 	fbt_probe_t *fbt = parg;
1016 	linker_ctf_t lc;
1017 	modctl_t *ctl = fbt->fbtp_ctl;
1018 	int ndx = desc->dtargd_ndx;
1019 	int symindx = fbt->fbtp_symindx;
1020 	uint32_t *ctfoff;
1021 	uint32_t offset;
1022 	ushort_t info, kind, n;
1023 
1024 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1025 		(void) strcpy(desc->dtargd_native, "int");
1026 		return;
1027 	}
1028 
1029 	desc->dtargd_ndx = DTRACE_ARGNONE;
1030 
1031 	/* Get a pointer to the CTF data and it's length. */
1032 	if (linker_ctf_get(ctl, &lc) != 0)
1033 		/* No CTF data? Something wrong? *shrug* */
1034 		return;
1035 
1036 	/* Check if this module hasn't been initialised yet. */
1037 	if (*lc.ctfoffp == NULL) {
1038 		/*
1039 		 * Initialise the CTF object and function symindx to
1040 		 * byte offset array.
1041 		 */
1042 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1043 			return;
1044 
1045 		/* Initialise the CTF type to byte offset array. */
1046 		if (fbt_typoff_init(&lc) != 0)
1047 			return;
1048 	}
1049 
1050 	ctfoff = *lc.ctfoffp;
1051 
1052 	if (ctfoff == NULL || *lc.typoffp == NULL)
1053 		return;
1054 
1055 	/* Check if the symbol index is out of range. */
1056 	if (symindx >= lc.nsym)
1057 		return;
1058 
1059 	/* Check if the symbol isn't cross-referenced. */
1060 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1061 		return;
1062 
1063 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1064 
1065 	info = *dp++;
1066 	kind = CTF_INFO_KIND(info);
1067 	n = CTF_INFO_VLEN(info);
1068 
1069 	if (kind == CTF_K_UNKNOWN && n == 0) {
1070 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1071 		return;
1072 	}
1073 
1074 	if (kind != CTF_K_FUNCTION) {
1075 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1076 		return;
1077 	}
1078 
1079 	if (fbt->fbtp_roffset != 0) {
1080 		/* Only return type is available for args[1] in return probe. */
1081 		if (ndx > 1)
1082 			return;
1083 		ASSERT(ndx == 1);
1084 	} else {
1085 		/* Check if the requested argument doesn't exist. */
1086 		if (ndx >= n)
1087 			return;
1088 
1089 		/* Skip the return type and arguments up to the one requested. */
1090 		dp += ndx + 1;
1091 	}
1092 
1093 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1094 		desc->dtargd_ndx = ndx;
1095 
1096 	return;
1097 }
1098 
1099 static int
1100 fbt_linker_file_cb(linker_file_t lf, void *arg)
1101 {
1102 
1103 	fbt_provide_module(arg, lf);
1104 
1105 	return (0);
1106 }
1107 
1108 static void
1109 fbt_load(void *dummy)
1110 {
1111 	/* Create the /dev/dtrace/fbt entry. */
1112 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1113 	    "dtrace/fbt");
1114 
1115 	/* Default the probe table size if not specified. */
1116 	if (fbt_probetab_size == 0)
1117 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1118 
1119 	/* Choose the hash mask for the probe table. */
1120 	fbt_probetab_mask = fbt_probetab_size - 1;
1121 
1122 	/* Allocate memory for the probe table. */
1123 	fbt_probetab =
1124 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1125 
1126 	dtrace_doubletrap_func = fbt_doubletrap;
1127 	dtrace_invop_add(fbt_invop);
1128 
1129 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1130 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1131 		return;
1132 
1133 	/* Create probes for the kernel and already-loaded modules. */
1134 	linker_file_foreach(fbt_linker_file_cb, NULL);
1135 }
1136 
1137 static int
1138 fbt_unload()
1139 {
1140 	int error = 0;
1141 
1142 	/* De-register the invalid opcode handler. */
1143 	dtrace_invop_remove(fbt_invop);
1144 
1145 	dtrace_doubletrap_func = NULL;
1146 
1147 	/* De-register this DTrace provider. */
1148 	if ((error = dtrace_unregister(fbt_id)) != 0)
1149 		return (error);
1150 
1151 	/* Free the probe table. */
1152 	free(fbt_probetab, M_FBT);
1153 	fbt_probetab = NULL;
1154 	fbt_probetab_mask = 0;
1155 
1156 	destroy_dev(fbt_cdev);
1157 
1158 	return (error);
1159 }
1160 
1161 static int
1162 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1163 {
1164 	int error = 0;
1165 
1166 	switch (type) {
1167 	case MOD_LOAD:
1168 		break;
1169 
1170 	case MOD_UNLOAD:
1171 		break;
1172 
1173 	case MOD_SHUTDOWN:
1174 		break;
1175 
1176 	default:
1177 		error = EOPNOTSUPP;
1178 		break;
1179 
1180 	}
1181 
1182 	return (error);
1183 }
1184 
1185 static int
1186 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1187 {
1188 	return (0);
1189 }
1190 
1191 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1192 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1193 
1194 DEV_MODULE(fbt, fbt_modevent, NULL);
1195 MODULE_VERSION(fbt, 1);
1196 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1197 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1198