xref: /openbsd/sys/dev/dt/dt_dev.c (revision 8ce4994e)
1 /*	$OpenBSD: dt_dev.c,v 1.42 2024/12/04 09:37:33 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/systm.h>
21 #include <sys/param.h>
22 #include <sys/clockintr.h>
23 #include <sys/device.h>
24 #include <sys/exec_elf.h>
25 #include <sys/malloc.h>
26 #include <sys/proc.h>
27 #include <sys/ptrace.h>
28 
29 #include <machine/intr.h>
30 
31 #include <dev/dt/dtvar.h>
32 
33 /*
34  * Number of frames to skip in stack traces.
35  *
36  * The number of frames required to execute dt(4) profiling code
37  * depends on the probe, context, architecture and possibly the
38  * compiler.
39  *
40  * Static probes (tracepoints) are executed in the context of the
41  * current thread and only need to skip frames up to the recording
42  * function.  For example the syscall provider:
43  *
44  *	dt_prov_syscall_entry+0x141
45  *	syscall+0x205		<--- start here
46  *	Xsyscall+0x128
47  *
48  * Probes executed in their own context, like the profile provider,
49  * need to skip the frames of that context which are different for
50  * every architecture.  For example the profile provider executed
51  * from hardclock(9) on amd64:
52  *
53  *	dt_prov_profile_enter+0x6e
54  *	hardclock+0x1a9
55  *	lapic_clockintr+0x3f
56  *	Xresume_lapic_ltimer+0x26
57  *	acpicpu_idle+0x1d2	<---- start here.
58  *	sched_idle+0x225
59  *	proc_trampoline+0x1c
60  */
61 #if defined(__amd64__)
62 #define DT_FA_PROFILE	5
63 #define DT_FA_STATIC	2
64 #elif defined(__i386__)
65 #define DT_FA_PROFILE	5
66 #define DT_FA_STATIC	2
67 #elif defined(__macppc__)
68 #define DT_FA_PROFILE  5
69 #define DT_FA_STATIC   2
70 #elif defined(__octeon__)
71 #define DT_FA_PROFILE	6
72 #define DT_FA_STATIC	2
73 #elif defined(__powerpc64__)
74 #define DT_FA_PROFILE	6
75 #define DT_FA_STATIC	2
76 #elif defined(__sparc64__)
77 #define DT_FA_PROFILE	7
78 #define DT_FA_STATIC	1
79 #else
80 #define DT_FA_STATIC	0
81 #define DT_FA_PROFILE	0
82 #endif
83 
84 #define DT_EVTRING_SIZE	16	/* # of slots in per PCB event ring */
85 
86 #define DPRINTF(x...) /* nothing */
87 
88 /*
89  * Per-CPU Event States
90  *
91  *  Locks used to protect struct members:
92  *	r	owned by thread doing read(2)
93  *	c	owned by CPU
94  *	s	sliced ownership, based on read/write indexes
95  *	p	written by CPU, read by thread doing read(2)
96  */
97 struct dt_cpubuf {
98 	unsigned int		 dc_prod;	/* [r] read index */
99 	unsigned int		 dc_cons;	/* [c] write index */
100 	struct dt_evt		*dc_ring;	/* [s] ring of event states */
101 	unsigned int	 	 dc_inevt;	/* [c] in event already? */
102 
103 	/* Counters */
104 	unsigned int		 dc_dropevt;	/* [p] # of events dropped */
105 	unsigned int		 dc_skiptick;	/* [p] # of ticks skipped */
106 	unsigned int		 dc_recurevt;	/* [p] # of recursive events */
107 	unsigned int		 dc_readevt;	/* [r] # of events read */
108 };
109 
110 /*
111  * Descriptor associated with each program opening /dev/dt.  It is used
112  * to keep track of enabled PCBs.
113  *
114  *  Locks used to protect struct members in this file:
115  *	a	atomic
116  *	K	kernel lock
117  *	r	owned by thread doing read(2)
118  *	I	invariant after initialization
119  */
120 struct dt_softc {
121 	SLIST_ENTRY(dt_softc)	 ds_next;	/* [K] descriptor list */
122 	int			 ds_unit;	/* [I] D_CLONE unique unit */
123 	pid_t			 ds_pid;	/* [I] PID of tracing program */
124 	void			*ds_si;		/* [I] to defer wakeup(9) */
125 
126 	struct dt_pcb_list	 ds_pcbs;	/* [K] list of enabled PCBs */
127 	int			 ds_recording;	/* [K] currently recording? */
128 	unsigned int		 ds_evtcnt;	/* [a] # of readable evts */
129 
130 	struct dt_cpubuf	 ds_cpu[MAXCPUS]; /* [I] Per-cpu event states */
131 	unsigned int		 ds_lastcpu;	/* [r] last CPU ring read(2). */
132 };
133 
134 SLIST_HEAD(, dt_softc) dtdev_list;	/* [K] list of open /dev/dt nodes */
135 
136 /*
137  * Probes are created during dt_attach() and never modified/freed during
138  * the lifetime of the system.  That's why we consider them as [I]mmutable.
139  */
140 unsigned int			dt_nprobes;	/* [I] # of probes available */
141 SIMPLEQ_HEAD(, dt_probe)	dt_probe_list;	/* [I] list of probes */
142 
143 struct rwlock			dt_lock = RWLOCK_INITIALIZER("dtlk");
144 volatile uint32_t		dt_tracing = 0;	/* [K] # of processes tracing */
145 
146 int allowdt;					/* [a] */
147 
148 void	dtattach(struct device *, struct device *, void *);
149 int	dtopen(dev_t, int, int, struct proc *);
150 int	dtclose(dev_t, int, int, struct proc *);
151 int	dtread(dev_t, struct uio *, int);
152 int	dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
153 
154 struct	dt_softc *dtlookup(int);
155 struct	dt_softc *dtalloc(void);
156 void	dtfree(struct dt_softc *);
157 
158 int	dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
159 int	dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *);
160 int	dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
161 int	dt_ioctl_record_start(struct dt_softc *);
162 void	dt_ioctl_record_stop(struct dt_softc *);
163 int	dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
164 int	dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
165 int	dt_ioctl_get_auxbase(struct dt_softc *, struct dtioc_getaux *);
166 
167 int	dt_ring_copy(struct dt_cpubuf *, struct uio *, size_t, size_t *);
168 
169 void	dt_wakeup(struct dt_softc *);
170 void	dt_deferred_wakeup(void *);
171 
172 void
dtattach(struct device * parent,struct device * self,void * aux)173 dtattach(struct device *parent, struct device *self, void *aux)
174 {
175 	SLIST_INIT(&dtdev_list);
176 	SIMPLEQ_INIT(&dt_probe_list);
177 
178 	/* Init providers */
179 	dt_nprobes += dt_prov_profile_init();
180 	dt_nprobes += dt_prov_syscall_init();
181 	dt_nprobes += dt_prov_static_init();
182 #ifdef DDBPROF
183 	dt_nprobes += dt_prov_kprobe_init();
184 #endif
185 }
186 
187 int
dtopen(dev_t dev,int flags,int mode,struct proc * p)188 dtopen(dev_t dev, int flags, int mode, struct proc *p)
189 {
190 	struct dt_softc *sc;
191 	int unit = minor(dev);
192 
193 	if (atomic_load_int(&allowdt) == 0)
194 		return EPERM;
195 
196 	sc = dtalloc();
197 	if (sc == NULL)
198 		return ENOMEM;
199 
200 	/* no sleep after this point */
201 	if (dtlookup(unit) != NULL) {
202 		dtfree(sc);
203 		return EBUSY;
204 	}
205 
206 	sc->ds_unit = unit;
207 	sc->ds_pid = p->p_p->ps_pid;
208 	TAILQ_INIT(&sc->ds_pcbs);
209 	sc->ds_lastcpu = 0;
210 	sc->ds_evtcnt = 0;
211 
212 	SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
213 
214 	DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
215 
216 	return 0;
217 }
218 
219 int
dtclose(dev_t dev,int flags,int mode,struct proc * p)220 dtclose(dev_t dev, int flags, int mode, struct proc *p)
221 {
222 	struct dt_softc *sc;
223 	int unit = minor(dev);
224 
225 	sc = dtlookup(unit);
226 	KASSERT(sc != NULL);
227 
228 	DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
229 
230 	SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
231 	dt_ioctl_record_stop(sc);
232 	dt_pcb_purge(&sc->ds_pcbs);
233 	dtfree(sc);
234 
235 	return 0;
236 }
237 
238 int
dtread(dev_t dev,struct uio * uio,int flags)239 dtread(dev_t dev, struct uio *uio, int flags)
240 {
241 	struct dt_softc *sc;
242 	struct dt_cpubuf *dc;
243 	int i, error = 0, unit = minor(dev);
244 	size_t count, max, read = 0;
245 
246 	sc = dtlookup(unit);
247 	KASSERT(sc != NULL);
248 
249 	max = howmany(uio->uio_resid, sizeof(struct dt_evt));
250 	if (max < 1)
251 		return (EMSGSIZE);
252 
253 	while (!atomic_load_int(&sc->ds_evtcnt)) {
254 		sleep_setup(sc, PWAIT | PCATCH, "dtread");
255 		error = sleep_finish(0, !atomic_load_int(&sc->ds_evtcnt));
256 		if (error == EINTR || error == ERESTART)
257 			break;
258 	}
259 	if (error)
260 		return error;
261 
262 	KERNEL_ASSERT_LOCKED();
263 	for (i = 0; i < ncpusfound; i++) {
264 		count = 0;
265 		dc = &sc->ds_cpu[(sc->ds_lastcpu + i) % ncpusfound];
266 		error = dt_ring_copy(dc, uio, max, &count);
267 		if (error && count == 0)
268 			break;
269 
270 		read += count;
271 		max -= count;
272 		if (max == 0)
273 			break;
274 	}
275 	sc->ds_lastcpu += i % ncpusfound;
276 
277 	atomic_sub_int(&sc->ds_evtcnt, read);
278 
279 	return error;
280 }
281 
282 int
dtioctl(dev_t dev,u_long cmd,caddr_t addr,int flag,struct proc * p)283 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
284 {
285 	struct dt_softc *sc;
286 	int unit = minor(dev);
287 	int on, error = 0;
288 
289 	sc = dtlookup(unit);
290 	KASSERT(sc != NULL);
291 
292 	switch (cmd) {
293 	case DTIOCGPLIST:
294 		return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
295 	case DTIOCGARGS:
296 		return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr);
297 	case DTIOCGSTATS:
298 		return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
299 	case DTIOCRECORD:
300 	case DTIOCPRBENABLE:
301 	case DTIOCPRBDISABLE:
302 	case DTIOCGETAUXBASE:
303 		/* root only ioctl(2) */
304 		break;
305 	default:
306 		return ENOTTY;
307 	}
308 
309 	if ((error = suser(p)) != 0)
310 		return error;
311 
312 	switch (cmd) {
313 	case DTIOCRECORD:
314 		on = *(int *)addr;
315 		if (on)
316 			error = dt_ioctl_record_start(sc);
317 		else
318 			dt_ioctl_record_stop(sc);
319 		break;
320 	case DTIOCPRBENABLE:
321 		error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
322 		break;
323 	case DTIOCPRBDISABLE:
324 		error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
325 		break;
326 	case DTIOCGETAUXBASE:
327 		error = dt_ioctl_get_auxbase(sc, (struct dtioc_getaux *)addr);
328 		break;
329 	default:
330 		KASSERT(0);
331 	}
332 
333 	return error;
334 }
335 
336 struct dt_softc *
dtlookup(int unit)337 dtlookup(int unit)
338 {
339 	struct dt_softc *sc;
340 
341 	KERNEL_ASSERT_LOCKED();
342 
343 	SLIST_FOREACH(sc, &dtdev_list, ds_next) {
344 		if (sc->ds_unit == unit)
345 			break;
346 	}
347 
348 	return sc;
349 }
350 
351 struct dt_softc *
dtalloc(void)352 dtalloc(void)
353 {
354 	struct dt_softc *sc;
355 	struct dt_evt *dtev;
356 	int i;
357 
358 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
359 	if (sc == NULL)
360 		return NULL;
361 
362 	for (i = 0; i < ncpusfound; i++) {
363 		dtev = mallocarray(DT_EVTRING_SIZE, sizeof(*dtev), M_DEVBUF,
364 		    M_WAITOK|M_CANFAIL|M_ZERO);
365 		if (dtev == NULL)
366 			break;
367 		sc->ds_cpu[i].dc_ring = dtev;
368 	}
369 	if (i < ncpusfound) {
370 		dtfree(sc);
371 		return NULL;
372 	}
373 
374 	sc->ds_si = softintr_establish(IPL_SOFTCLOCK, dt_deferred_wakeup, sc);
375 	if (sc->ds_si == NULL) {
376 		dtfree(sc);
377 		return NULL;
378 	}
379 
380 	return sc;
381 }
382 
383 void
dtfree(struct dt_softc * sc)384 dtfree(struct dt_softc *sc)
385 {
386 	struct dt_evt *dtev;
387 	int i;
388 
389 	if (sc->ds_si != NULL)
390 		softintr_disestablish(sc->ds_si);
391 
392 	for (i = 0; i < ncpusfound; i++) {
393 		dtev = sc->ds_cpu[i].dc_ring;
394 		free(dtev, M_DEVBUF, DT_EVTRING_SIZE * sizeof(*dtev));
395 	}
396 	free(sc, M_DEVBUF, sizeof(*sc));
397 }
398 
399 int
dt_ioctl_list_probes(struct dt_softc * sc,struct dtioc_probe * dtpr)400 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
401 {
402 	struct dtioc_probe_info info, *dtpi;
403 	struct dt_probe *dtp;
404 	size_t size;
405 	int error = 0;
406 
407 	size = dtpr->dtpr_size;
408 	dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
409 	if (size == 0)
410 		return 0;
411 
412 	dtpi = dtpr->dtpr_probes;
413 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
414 		if (size < sizeof(*dtpi)) {
415 			error = ENOSPC;
416 			break;
417 		}
418 		memset(&info, 0, sizeof(info));
419 		info.dtpi_pbn = dtp->dtp_pbn;
420 		info.dtpi_nargs = dtp->dtp_nargs;
421 		strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
422 		    sizeof(info.dtpi_prov));
423 		strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
424 		strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
425 		error = copyout(&info, dtpi, sizeof(*dtpi));
426 		if (error)
427 			break;
428 		size -= sizeof(*dtpi);
429 		dtpi++;
430 	}
431 
432 	return error;
433 }
434 
435 int
dt_ioctl_get_args(struct dt_softc * sc,struct dtioc_arg * dtar)436 dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar)
437 {
438 	struct dtioc_arg_info info, *dtai;
439 	struct dt_probe *dtp;
440 	size_t size, n, t;
441 	uint32_t pbn;
442 	int error = 0;
443 
444 	pbn = dtar->dtar_pbn;
445 	if (pbn == 0 || pbn > dt_nprobes)
446 		return EINVAL;
447 
448 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
449 		if (pbn == dtp->dtp_pbn)
450 			break;
451 	}
452 	if (dtp == NULL)
453 		return EINVAL;
454 
455 	if (dtp->dtp_sysnum != 0) {
456 		/* currently not supported for system calls */
457 		dtar->dtar_size = 0;
458 		return 0;
459 	}
460 
461 	size = dtar->dtar_size;
462 	dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar);
463 	if (size == 0)
464 		return 0;
465 
466 	t = 0;
467 	dtai = dtar->dtar_args;
468 	for (n = 0; n < dtp->dtp_nargs; n++) {
469 		if (size < sizeof(*dtai)) {
470 			error = ENOSPC;
471 			break;
472 		}
473 		if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL)
474 			continue;
475 		memset(&info, 0, sizeof(info));
476 		info.dtai_pbn = dtp->dtp_pbn;
477 		info.dtai_argn = t++;
478 		strlcpy(info.dtai_argtype, dtp->dtp_argtype[n],
479 		    sizeof(info.dtai_argtype));
480 		error = copyout(&info, dtai, sizeof(*dtai));
481 		if (error)
482 			break;
483 		size -= sizeof(*dtai);
484 		dtai++;
485 	}
486 	dtar->dtar_size = t * sizeof(*dtar);
487 
488 	return error;
489 }
490 
491 int
dt_ioctl_get_stats(struct dt_softc * sc,struct dtioc_stat * dtst)492 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
493 {
494 	struct dt_cpubuf *dc;
495 	uint64_t readevt, dropevt, skiptick, recurevt;
496 	int i;
497 
498 	readevt = dropevt = skiptick = 0;
499 	for (i = 0; i < ncpusfound; i++) {
500 		dc = &sc->ds_cpu[i];
501 
502 		membar_consumer();
503 		dropevt += dc->dc_dropevt;
504 		skiptick = dc->dc_skiptick;
505 		recurevt = dc->dc_recurevt;
506 		readevt += dc->dc_readevt;
507 	}
508 
509 	dtst->dtst_readevt = readevt;
510 	dtst->dtst_dropevt = dropevt;
511 	dtst->dtst_skiptick = skiptick;
512 	dtst->dtst_recurevt = recurevt;
513 	return 0;
514 }
515 
516 int
dt_ioctl_record_start(struct dt_softc * sc)517 dt_ioctl_record_start(struct dt_softc *sc)
518 {
519 	uint64_t now;
520 	struct dt_pcb *dp;
521 
522 	if (sc->ds_recording)
523 		return EBUSY;
524 
525 	KERNEL_ASSERT_LOCKED();
526 	if (TAILQ_EMPTY(&sc->ds_pcbs))
527 		return ENOENT;
528 
529 	rw_enter_write(&dt_lock);
530 	now = nsecuptime();
531 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
532 		struct dt_probe *dtp = dp->dp_dtp;
533 
534 		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
535 		dtp->dtp_recording++;
536 		dtp->dtp_prov->dtpv_recording++;
537 
538 		if (dp->dp_nsecs != 0) {
539 			clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock,
540 			    dp);
541 			clockintr_schedule(&dp->dp_clockintr,
542 			    now + dp->dp_nsecs);
543 		}
544 	}
545 	rw_exit_write(&dt_lock);
546 
547 	sc->ds_recording = 1;
548 	dt_tracing++;
549 
550 	return 0;
551 }
552 
553 void
dt_ioctl_record_stop(struct dt_softc * sc)554 dt_ioctl_record_stop(struct dt_softc *sc)
555 {
556 	struct dt_pcb *dp;
557 
558 	if (!sc->ds_recording)
559 		return;
560 
561 	DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
562 
563 	dt_tracing--;
564 	sc->ds_recording = 0;
565 
566 	rw_enter_write(&dt_lock);
567 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
568 		struct dt_probe *dtp = dp->dp_dtp;
569 
570 		/*
571 		 * Set an execution barrier to ensure the shared
572 		 * reference to dp is inactive.
573 		 */
574 		if (dp->dp_nsecs != 0)
575 			clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
576 
577 		dtp->dtp_recording--;
578 		dtp->dtp_prov->dtpv_recording--;
579 		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
580 	}
581 	rw_exit_write(&dt_lock);
582 
583 	/* Wait until readers cannot access the PCBs. */
584 	smr_barrier();
585 }
586 
587 int
dt_ioctl_probe_enable(struct dt_softc * sc,struct dtioc_req * dtrq)588 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
589 {
590 	struct dt_pcb_list plist;
591 	struct dt_probe *dtp;
592 	struct dt_pcb *dp;
593 	int error;
594 
595 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
596 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
597 			break;
598 	}
599 	if (dtp == NULL)
600 		return ENOENT;
601 
602 	/* Only allow one probe of each type. */
603 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
604 		if (dp->dp_dtp->dtp_pbn == dtrq->dtrq_pbn)
605 			return EEXIST;
606 	}
607 
608 	TAILQ_INIT(&plist);
609 	error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
610 	if (error)
611 		return error;
612 
613 	DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
614 	    dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
615 
616 	/* Append all PCBs to this instance */
617 	TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
618 
619 	return 0;
620 }
621 
622 int
dt_ioctl_probe_disable(struct dt_softc * sc,struct dtioc_req * dtrq)623 dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
624 {
625 	struct dt_probe *dtp;
626 	int error;
627 
628 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
629 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
630 			break;
631 	}
632 	if (dtp == NULL)
633 		return ENOENT;
634 
635 	if (dtp->dtp_prov->dtpv_dealloc) {
636 		error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq);
637 		if (error)
638 			return error;
639 	}
640 
641 	DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid,
642 	    dtrq->dtrq_pbn);
643 
644 	return 0;
645 }
646 
647 int
dt_ioctl_get_auxbase(struct dt_softc * sc,struct dtioc_getaux * dtga)648 dt_ioctl_get_auxbase(struct dt_softc *sc, struct dtioc_getaux *dtga)
649 {
650 	struct uio uio;
651 	struct iovec iov;
652 	struct process *pr;
653 	struct proc *p = curproc;
654 	AuxInfo auxv[ELF_AUX_ENTRIES];
655 	int i, error;
656 
657 	dtga->dtga_auxbase = 0;
658 
659 	if ((pr = prfind(dtga->dtga_pid)) == NULL)
660 		return ESRCH;
661 
662 	iov.iov_base = auxv;
663 	iov.iov_len = sizeof(auxv);
664 	uio.uio_iov = &iov;
665 	uio.uio_iovcnt = 1;
666 	uio.uio_offset = pr->ps_auxinfo;
667 	uio.uio_resid = sizeof(auxv);
668 	uio.uio_segflg = UIO_SYSSPACE;
669 	uio.uio_procp = p;
670 	uio.uio_rw = UIO_READ;
671 
672 	error = process_domem(p, pr, &uio, PT_READ_D);
673 	if (error)
674 		return error;
675 
676 	for (i = 0; i < ELF_AUX_ENTRIES; i++)
677 		if (auxv[i].au_id == AUX_base)
678 			dtga->dtga_auxbase = auxv[i].au_v;
679 
680 	return 0;
681 }
682 
683 struct dt_probe *
dt_dev_alloc_probe(const char * func,const char * name,struct dt_provider * dtpv)684 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
685 {
686 	struct dt_probe *dtp;
687 
688 	dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
689 	if (dtp == NULL)
690 		return NULL;
691 
692 	SMR_SLIST_INIT(&dtp->dtp_pcbs);
693 	dtp->dtp_prov = dtpv;
694 	dtp->dtp_func = func;
695 	dtp->dtp_name = name;
696 	dtp->dtp_sysnum = -1;
697 	dtp->dtp_ref = 0;
698 
699 	return dtp;
700 }
701 
702 void
dt_dev_register_probe(struct dt_probe * dtp)703 dt_dev_register_probe(struct dt_probe *dtp)
704 {
705 	static uint64_t probe_nb;
706 
707 	dtp->dtp_pbn = ++probe_nb;
708 	SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
709 }
710 
711 struct dt_pcb *
dt_pcb_alloc(struct dt_probe * dtp,struct dt_softc * sc)712 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
713 {
714 	struct dt_pcb *dp;
715 
716 	dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
717 	if (dp == NULL)
718 		return NULL;
719 
720 	dp->dp_sc = sc;
721 	dp->dp_dtp = dtp;
722 	return dp;
723 }
724 
725 void
dt_pcb_free(struct dt_pcb * dp)726 dt_pcb_free(struct dt_pcb *dp)
727 {
728 	free(dp, M_DT, sizeof(*dp));
729 }
730 
731 void
dt_pcb_purge(struct dt_pcb_list * plist)732 dt_pcb_purge(struct dt_pcb_list *plist)
733 {
734 	struct dt_pcb *dp;
735 
736 	while ((dp = TAILQ_FIRST(plist)) != NULL) {
737 		TAILQ_REMOVE(plist, dp, dp_snext);
738 		dt_pcb_free(dp);
739 	}
740 }
741 
742 void
dt_pcb_ring_skiptick(struct dt_pcb * dp,unsigned int skip)743 dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip)
744 {
745 	struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
746 
747 	dc->dc_skiptick += skip;
748 	membar_producer();
749 }
750 
751 /*
752  * Get a reference to the next free event state from the ring.
753  */
754 struct dt_evt *
dt_pcb_ring_get(struct dt_pcb * dp,int profiling)755 dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
756 {
757 	struct proc *p = curproc;
758 	struct dt_evt *dtev;
759 	int prod, cons, distance;
760 	struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
761 
762 	if (dc->dc_inevt == 1) {
763 		dc->dc_recurevt++;
764 		membar_producer();
765 		return NULL;
766 	}
767 
768 	dc->dc_inevt = 1;
769 
770 	membar_consumer();
771 	prod = dc->dc_prod;
772 	cons = dc->dc_cons;
773 	distance = prod - cons;
774 	if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
775 		/* read(2) isn't finished */
776 		dc->dc_dropevt++;
777 		membar_producer();
778 
779 		dc->dc_inevt = 0;
780 		return NULL;
781 	}
782 
783 	/*
784 	 * Save states in next free event slot.
785 	 */
786 	dtev = &dc->dc_ring[cons];
787 	memset(dtev, 0, sizeof(*dtev));
788 
789 	dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
790 	dtev->dtev_cpu = cpu_number();
791 	dtev->dtev_pid = p->p_p->ps_pid;
792 	dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET;
793 	nanotime(&dtev->dtev_tsp);
794 
795 	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
796 		strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm));
797 
798 	if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) {
799 		if (profiling)
800 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
801 		else
802 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
803 	}
804 	if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
805 		stacktrace_save_utrace(&dtev->dtev_ustack);
806 
807 	return dtev;
808 }
809 
810 void
dt_pcb_ring_consume(struct dt_pcb * dp,struct dt_evt * dtev)811 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
812 {
813 	struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
814 
815 	KASSERT(dtev == &dc->dc_ring[dc->dc_cons]);
816 
817 	dc->dc_cons = (dc->dc_cons + 1) % DT_EVTRING_SIZE;
818 	membar_producer();
819 
820 	atomic_inc_int(&dp->dp_sc->ds_evtcnt);
821 	dc->dc_inevt = 0;
822 
823 	dt_wakeup(dp->dp_sc);
824 }
825 
826 /*
827  * Copy at most `max' events from `dc', producing the same amount
828  * of free slots.
829  */
830 int
dt_ring_copy(struct dt_cpubuf * dc,struct uio * uio,size_t max,size_t * rcvd)831 dt_ring_copy(struct dt_cpubuf *dc, struct uio *uio, size_t max, size_t *rcvd)
832 {
833 	size_t count, copied = 0;
834 	unsigned int cons, prod;
835 	int error = 0;
836 
837 	KASSERT(max > 0);
838 
839 	membar_consumer();
840 	cons = dc->dc_cons;
841 	prod = dc->dc_prod;
842 
843 	if (cons < prod)
844 		count = DT_EVTRING_SIZE - prod;
845 	else
846 		count = cons - prod;
847 
848 	if (count == 0)
849 		return 0;
850 
851 	count = MIN(count, max);
852 	error = uiomove(&dc->dc_ring[prod], count * sizeof(struct dt_evt), uio);
853 	if (error)
854 		return error;
855 	copied += count;
856 
857 	/* Produce */
858 	prod = (prod + count) % DT_EVTRING_SIZE;
859 
860 	/* If the ring didn't wrap, stop here. */
861 	if (max == copied || prod != 0 || cons == 0)
862 		goto out;
863 
864 	count = MIN(cons, (max - copied));
865 	error = uiomove(&dc->dc_ring[0], count * sizeof(struct dt_evt), uio);
866 	if (error)
867 		goto out;
868 
869 	copied += count;
870 	prod += count;
871 
872 out:
873 	dc->dc_readevt += copied;
874 	dc->dc_prod = prod;
875 	membar_producer();
876 
877 	*rcvd = copied;
878 	return error;
879 }
880 
881 void
dt_wakeup(struct dt_softc * sc)882 dt_wakeup(struct dt_softc *sc)
883 {
884 	/*
885 	 * It is not always safe or possible to call wakeup(9) and grab
886 	 * the SCHED_LOCK() from a given tracepoint.  This is true for
887 	 * any tracepoint that might trigger inside the scheduler or at
888 	 * any IPL higher than IPL_SCHED.  For this reason use a soft-
889 	 * interrupt to defer the wakeup.
890 	 */
891 	softintr_schedule(sc->ds_si);
892 }
893 
894 void
dt_deferred_wakeup(void * arg)895 dt_deferred_wakeup(void *arg)
896 {
897 	struct dt_softc *sc = arg;
898 
899 	wakeup(sc);
900 }
901