1 /* $OpenBSD: dt_dev.c,v 1.42 2024/12/04 09:37:33 mpi Exp $ */
2
3 /*
4 * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/types.h>
20 #include <sys/systm.h>
21 #include <sys/param.h>
22 #include <sys/clockintr.h>
23 #include <sys/device.h>
24 #include <sys/exec_elf.h>
25 #include <sys/malloc.h>
26 #include <sys/proc.h>
27 #include <sys/ptrace.h>
28
29 #include <machine/intr.h>
30
31 #include <dev/dt/dtvar.h>
32
33 /*
34 * Number of frames to skip in stack traces.
35 *
36 * The number of frames required to execute dt(4) profiling code
37 * depends on the probe, context, architecture and possibly the
38 * compiler.
39 *
40 * Static probes (tracepoints) are executed in the context of the
41 * current thread and only need to skip frames up to the recording
42 * function. For example the syscall provider:
43 *
44 * dt_prov_syscall_entry+0x141
45 * syscall+0x205 <--- start here
46 * Xsyscall+0x128
47 *
48 * Probes executed in their own context, like the profile provider,
49 * need to skip the frames of that context which are different for
50 * every architecture. For example the profile provider executed
51 * from hardclock(9) on amd64:
52 *
53 * dt_prov_profile_enter+0x6e
54 * hardclock+0x1a9
55 * lapic_clockintr+0x3f
56 * Xresume_lapic_ltimer+0x26
57 * acpicpu_idle+0x1d2 <---- start here.
58 * sched_idle+0x225
59 * proc_trampoline+0x1c
60 */
61 #if defined(__amd64__)
62 #define DT_FA_PROFILE 5
63 #define DT_FA_STATIC 2
64 #elif defined(__i386__)
65 #define DT_FA_PROFILE 5
66 #define DT_FA_STATIC 2
67 #elif defined(__macppc__)
68 #define DT_FA_PROFILE 5
69 #define DT_FA_STATIC 2
70 #elif defined(__octeon__)
71 #define DT_FA_PROFILE 6
72 #define DT_FA_STATIC 2
73 #elif defined(__powerpc64__)
74 #define DT_FA_PROFILE 6
75 #define DT_FA_STATIC 2
76 #elif defined(__sparc64__)
77 #define DT_FA_PROFILE 7
78 #define DT_FA_STATIC 1
79 #else
80 #define DT_FA_STATIC 0
81 #define DT_FA_PROFILE 0
82 #endif
83
84 #define DT_EVTRING_SIZE 16 /* # of slots in per PCB event ring */
85
86 #define DPRINTF(x...) /* nothing */
87
88 /*
89 * Per-CPU Event States
90 *
91 * Locks used to protect struct members:
92 * r owned by thread doing read(2)
93 * c owned by CPU
94 * s sliced ownership, based on read/write indexes
95 * p written by CPU, read by thread doing read(2)
96 */
97 struct dt_cpubuf {
98 unsigned int dc_prod; /* [r] read index */
99 unsigned int dc_cons; /* [c] write index */
100 struct dt_evt *dc_ring; /* [s] ring of event states */
101 unsigned int dc_inevt; /* [c] in event already? */
102
103 /* Counters */
104 unsigned int dc_dropevt; /* [p] # of events dropped */
105 unsigned int dc_skiptick; /* [p] # of ticks skipped */
106 unsigned int dc_recurevt; /* [p] # of recursive events */
107 unsigned int dc_readevt; /* [r] # of events read */
108 };
109
110 /*
111 * Descriptor associated with each program opening /dev/dt. It is used
112 * to keep track of enabled PCBs.
113 *
114 * Locks used to protect struct members in this file:
115 * a atomic
116 * K kernel lock
117 * r owned by thread doing read(2)
118 * I invariant after initialization
119 */
120 struct dt_softc {
121 SLIST_ENTRY(dt_softc) ds_next; /* [K] descriptor list */
122 int ds_unit; /* [I] D_CLONE unique unit */
123 pid_t ds_pid; /* [I] PID of tracing program */
124 void *ds_si; /* [I] to defer wakeup(9) */
125
126 struct dt_pcb_list ds_pcbs; /* [K] list of enabled PCBs */
127 int ds_recording; /* [K] currently recording? */
128 unsigned int ds_evtcnt; /* [a] # of readable evts */
129
130 struct dt_cpubuf ds_cpu[MAXCPUS]; /* [I] Per-cpu event states */
131 unsigned int ds_lastcpu; /* [r] last CPU ring read(2). */
132 };
133
134 SLIST_HEAD(, dt_softc) dtdev_list; /* [K] list of open /dev/dt nodes */
135
136 /*
137 * Probes are created during dt_attach() and never modified/freed during
138 * the lifetime of the system. That's why we consider them as [I]mmutable.
139 */
140 unsigned int dt_nprobes; /* [I] # of probes available */
141 SIMPLEQ_HEAD(, dt_probe) dt_probe_list; /* [I] list of probes */
142
143 struct rwlock dt_lock = RWLOCK_INITIALIZER("dtlk");
144 volatile uint32_t dt_tracing = 0; /* [K] # of processes tracing */
145
146 int allowdt; /* [a] */
147
148 void dtattach(struct device *, struct device *, void *);
149 int dtopen(dev_t, int, int, struct proc *);
150 int dtclose(dev_t, int, int, struct proc *);
151 int dtread(dev_t, struct uio *, int);
152 int dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
153
154 struct dt_softc *dtlookup(int);
155 struct dt_softc *dtalloc(void);
156 void dtfree(struct dt_softc *);
157
158 int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
159 int dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *);
160 int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
161 int dt_ioctl_record_start(struct dt_softc *);
162 void dt_ioctl_record_stop(struct dt_softc *);
163 int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
164 int dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
165 int dt_ioctl_get_auxbase(struct dt_softc *, struct dtioc_getaux *);
166
167 int dt_ring_copy(struct dt_cpubuf *, struct uio *, size_t, size_t *);
168
169 void dt_wakeup(struct dt_softc *);
170 void dt_deferred_wakeup(void *);
171
172 void
dtattach(struct device * parent,struct device * self,void * aux)173 dtattach(struct device *parent, struct device *self, void *aux)
174 {
175 SLIST_INIT(&dtdev_list);
176 SIMPLEQ_INIT(&dt_probe_list);
177
178 /* Init providers */
179 dt_nprobes += dt_prov_profile_init();
180 dt_nprobes += dt_prov_syscall_init();
181 dt_nprobes += dt_prov_static_init();
182 #ifdef DDBPROF
183 dt_nprobes += dt_prov_kprobe_init();
184 #endif
185 }
186
187 int
dtopen(dev_t dev,int flags,int mode,struct proc * p)188 dtopen(dev_t dev, int flags, int mode, struct proc *p)
189 {
190 struct dt_softc *sc;
191 int unit = minor(dev);
192
193 if (atomic_load_int(&allowdt) == 0)
194 return EPERM;
195
196 sc = dtalloc();
197 if (sc == NULL)
198 return ENOMEM;
199
200 /* no sleep after this point */
201 if (dtlookup(unit) != NULL) {
202 dtfree(sc);
203 return EBUSY;
204 }
205
206 sc->ds_unit = unit;
207 sc->ds_pid = p->p_p->ps_pid;
208 TAILQ_INIT(&sc->ds_pcbs);
209 sc->ds_lastcpu = 0;
210 sc->ds_evtcnt = 0;
211
212 SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
213
214 DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
215
216 return 0;
217 }
218
219 int
dtclose(dev_t dev,int flags,int mode,struct proc * p)220 dtclose(dev_t dev, int flags, int mode, struct proc *p)
221 {
222 struct dt_softc *sc;
223 int unit = minor(dev);
224
225 sc = dtlookup(unit);
226 KASSERT(sc != NULL);
227
228 DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
229
230 SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
231 dt_ioctl_record_stop(sc);
232 dt_pcb_purge(&sc->ds_pcbs);
233 dtfree(sc);
234
235 return 0;
236 }
237
238 int
dtread(dev_t dev,struct uio * uio,int flags)239 dtread(dev_t dev, struct uio *uio, int flags)
240 {
241 struct dt_softc *sc;
242 struct dt_cpubuf *dc;
243 int i, error = 0, unit = minor(dev);
244 size_t count, max, read = 0;
245
246 sc = dtlookup(unit);
247 KASSERT(sc != NULL);
248
249 max = howmany(uio->uio_resid, sizeof(struct dt_evt));
250 if (max < 1)
251 return (EMSGSIZE);
252
253 while (!atomic_load_int(&sc->ds_evtcnt)) {
254 sleep_setup(sc, PWAIT | PCATCH, "dtread");
255 error = sleep_finish(0, !atomic_load_int(&sc->ds_evtcnt));
256 if (error == EINTR || error == ERESTART)
257 break;
258 }
259 if (error)
260 return error;
261
262 KERNEL_ASSERT_LOCKED();
263 for (i = 0; i < ncpusfound; i++) {
264 count = 0;
265 dc = &sc->ds_cpu[(sc->ds_lastcpu + i) % ncpusfound];
266 error = dt_ring_copy(dc, uio, max, &count);
267 if (error && count == 0)
268 break;
269
270 read += count;
271 max -= count;
272 if (max == 0)
273 break;
274 }
275 sc->ds_lastcpu += i % ncpusfound;
276
277 atomic_sub_int(&sc->ds_evtcnt, read);
278
279 return error;
280 }
281
282 int
dtioctl(dev_t dev,u_long cmd,caddr_t addr,int flag,struct proc * p)283 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
284 {
285 struct dt_softc *sc;
286 int unit = minor(dev);
287 int on, error = 0;
288
289 sc = dtlookup(unit);
290 KASSERT(sc != NULL);
291
292 switch (cmd) {
293 case DTIOCGPLIST:
294 return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
295 case DTIOCGARGS:
296 return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr);
297 case DTIOCGSTATS:
298 return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
299 case DTIOCRECORD:
300 case DTIOCPRBENABLE:
301 case DTIOCPRBDISABLE:
302 case DTIOCGETAUXBASE:
303 /* root only ioctl(2) */
304 break;
305 default:
306 return ENOTTY;
307 }
308
309 if ((error = suser(p)) != 0)
310 return error;
311
312 switch (cmd) {
313 case DTIOCRECORD:
314 on = *(int *)addr;
315 if (on)
316 error = dt_ioctl_record_start(sc);
317 else
318 dt_ioctl_record_stop(sc);
319 break;
320 case DTIOCPRBENABLE:
321 error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
322 break;
323 case DTIOCPRBDISABLE:
324 error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
325 break;
326 case DTIOCGETAUXBASE:
327 error = dt_ioctl_get_auxbase(sc, (struct dtioc_getaux *)addr);
328 break;
329 default:
330 KASSERT(0);
331 }
332
333 return error;
334 }
335
336 struct dt_softc *
dtlookup(int unit)337 dtlookup(int unit)
338 {
339 struct dt_softc *sc;
340
341 KERNEL_ASSERT_LOCKED();
342
343 SLIST_FOREACH(sc, &dtdev_list, ds_next) {
344 if (sc->ds_unit == unit)
345 break;
346 }
347
348 return sc;
349 }
350
351 struct dt_softc *
dtalloc(void)352 dtalloc(void)
353 {
354 struct dt_softc *sc;
355 struct dt_evt *dtev;
356 int i;
357
358 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
359 if (sc == NULL)
360 return NULL;
361
362 for (i = 0; i < ncpusfound; i++) {
363 dtev = mallocarray(DT_EVTRING_SIZE, sizeof(*dtev), M_DEVBUF,
364 M_WAITOK|M_CANFAIL|M_ZERO);
365 if (dtev == NULL)
366 break;
367 sc->ds_cpu[i].dc_ring = dtev;
368 }
369 if (i < ncpusfound) {
370 dtfree(sc);
371 return NULL;
372 }
373
374 sc->ds_si = softintr_establish(IPL_SOFTCLOCK, dt_deferred_wakeup, sc);
375 if (sc->ds_si == NULL) {
376 dtfree(sc);
377 return NULL;
378 }
379
380 return sc;
381 }
382
383 void
dtfree(struct dt_softc * sc)384 dtfree(struct dt_softc *sc)
385 {
386 struct dt_evt *dtev;
387 int i;
388
389 if (sc->ds_si != NULL)
390 softintr_disestablish(sc->ds_si);
391
392 for (i = 0; i < ncpusfound; i++) {
393 dtev = sc->ds_cpu[i].dc_ring;
394 free(dtev, M_DEVBUF, DT_EVTRING_SIZE * sizeof(*dtev));
395 }
396 free(sc, M_DEVBUF, sizeof(*sc));
397 }
398
399 int
dt_ioctl_list_probes(struct dt_softc * sc,struct dtioc_probe * dtpr)400 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
401 {
402 struct dtioc_probe_info info, *dtpi;
403 struct dt_probe *dtp;
404 size_t size;
405 int error = 0;
406
407 size = dtpr->dtpr_size;
408 dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
409 if (size == 0)
410 return 0;
411
412 dtpi = dtpr->dtpr_probes;
413 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
414 if (size < sizeof(*dtpi)) {
415 error = ENOSPC;
416 break;
417 }
418 memset(&info, 0, sizeof(info));
419 info.dtpi_pbn = dtp->dtp_pbn;
420 info.dtpi_nargs = dtp->dtp_nargs;
421 strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
422 sizeof(info.dtpi_prov));
423 strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
424 strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
425 error = copyout(&info, dtpi, sizeof(*dtpi));
426 if (error)
427 break;
428 size -= sizeof(*dtpi);
429 dtpi++;
430 }
431
432 return error;
433 }
434
435 int
dt_ioctl_get_args(struct dt_softc * sc,struct dtioc_arg * dtar)436 dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar)
437 {
438 struct dtioc_arg_info info, *dtai;
439 struct dt_probe *dtp;
440 size_t size, n, t;
441 uint32_t pbn;
442 int error = 0;
443
444 pbn = dtar->dtar_pbn;
445 if (pbn == 0 || pbn > dt_nprobes)
446 return EINVAL;
447
448 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
449 if (pbn == dtp->dtp_pbn)
450 break;
451 }
452 if (dtp == NULL)
453 return EINVAL;
454
455 if (dtp->dtp_sysnum != 0) {
456 /* currently not supported for system calls */
457 dtar->dtar_size = 0;
458 return 0;
459 }
460
461 size = dtar->dtar_size;
462 dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar);
463 if (size == 0)
464 return 0;
465
466 t = 0;
467 dtai = dtar->dtar_args;
468 for (n = 0; n < dtp->dtp_nargs; n++) {
469 if (size < sizeof(*dtai)) {
470 error = ENOSPC;
471 break;
472 }
473 if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL)
474 continue;
475 memset(&info, 0, sizeof(info));
476 info.dtai_pbn = dtp->dtp_pbn;
477 info.dtai_argn = t++;
478 strlcpy(info.dtai_argtype, dtp->dtp_argtype[n],
479 sizeof(info.dtai_argtype));
480 error = copyout(&info, dtai, sizeof(*dtai));
481 if (error)
482 break;
483 size -= sizeof(*dtai);
484 dtai++;
485 }
486 dtar->dtar_size = t * sizeof(*dtar);
487
488 return error;
489 }
490
491 int
dt_ioctl_get_stats(struct dt_softc * sc,struct dtioc_stat * dtst)492 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
493 {
494 struct dt_cpubuf *dc;
495 uint64_t readevt, dropevt, skiptick, recurevt;
496 int i;
497
498 readevt = dropevt = skiptick = 0;
499 for (i = 0; i < ncpusfound; i++) {
500 dc = &sc->ds_cpu[i];
501
502 membar_consumer();
503 dropevt += dc->dc_dropevt;
504 skiptick = dc->dc_skiptick;
505 recurevt = dc->dc_recurevt;
506 readevt += dc->dc_readevt;
507 }
508
509 dtst->dtst_readevt = readevt;
510 dtst->dtst_dropevt = dropevt;
511 dtst->dtst_skiptick = skiptick;
512 dtst->dtst_recurevt = recurevt;
513 return 0;
514 }
515
516 int
dt_ioctl_record_start(struct dt_softc * sc)517 dt_ioctl_record_start(struct dt_softc *sc)
518 {
519 uint64_t now;
520 struct dt_pcb *dp;
521
522 if (sc->ds_recording)
523 return EBUSY;
524
525 KERNEL_ASSERT_LOCKED();
526 if (TAILQ_EMPTY(&sc->ds_pcbs))
527 return ENOENT;
528
529 rw_enter_write(&dt_lock);
530 now = nsecuptime();
531 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
532 struct dt_probe *dtp = dp->dp_dtp;
533
534 SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
535 dtp->dtp_recording++;
536 dtp->dtp_prov->dtpv_recording++;
537
538 if (dp->dp_nsecs != 0) {
539 clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock,
540 dp);
541 clockintr_schedule(&dp->dp_clockintr,
542 now + dp->dp_nsecs);
543 }
544 }
545 rw_exit_write(&dt_lock);
546
547 sc->ds_recording = 1;
548 dt_tracing++;
549
550 return 0;
551 }
552
553 void
dt_ioctl_record_stop(struct dt_softc * sc)554 dt_ioctl_record_stop(struct dt_softc *sc)
555 {
556 struct dt_pcb *dp;
557
558 if (!sc->ds_recording)
559 return;
560
561 DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
562
563 dt_tracing--;
564 sc->ds_recording = 0;
565
566 rw_enter_write(&dt_lock);
567 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
568 struct dt_probe *dtp = dp->dp_dtp;
569
570 /*
571 * Set an execution barrier to ensure the shared
572 * reference to dp is inactive.
573 */
574 if (dp->dp_nsecs != 0)
575 clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
576
577 dtp->dtp_recording--;
578 dtp->dtp_prov->dtpv_recording--;
579 SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
580 }
581 rw_exit_write(&dt_lock);
582
583 /* Wait until readers cannot access the PCBs. */
584 smr_barrier();
585 }
586
587 int
dt_ioctl_probe_enable(struct dt_softc * sc,struct dtioc_req * dtrq)588 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
589 {
590 struct dt_pcb_list plist;
591 struct dt_probe *dtp;
592 struct dt_pcb *dp;
593 int error;
594
595 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
596 if (dtp->dtp_pbn == dtrq->dtrq_pbn)
597 break;
598 }
599 if (dtp == NULL)
600 return ENOENT;
601
602 /* Only allow one probe of each type. */
603 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
604 if (dp->dp_dtp->dtp_pbn == dtrq->dtrq_pbn)
605 return EEXIST;
606 }
607
608 TAILQ_INIT(&plist);
609 error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
610 if (error)
611 return error;
612
613 DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
614 dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
615
616 /* Append all PCBs to this instance */
617 TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
618
619 return 0;
620 }
621
622 int
dt_ioctl_probe_disable(struct dt_softc * sc,struct dtioc_req * dtrq)623 dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
624 {
625 struct dt_probe *dtp;
626 int error;
627
628 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
629 if (dtp->dtp_pbn == dtrq->dtrq_pbn)
630 break;
631 }
632 if (dtp == NULL)
633 return ENOENT;
634
635 if (dtp->dtp_prov->dtpv_dealloc) {
636 error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq);
637 if (error)
638 return error;
639 }
640
641 DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid,
642 dtrq->dtrq_pbn);
643
644 return 0;
645 }
646
647 int
dt_ioctl_get_auxbase(struct dt_softc * sc,struct dtioc_getaux * dtga)648 dt_ioctl_get_auxbase(struct dt_softc *sc, struct dtioc_getaux *dtga)
649 {
650 struct uio uio;
651 struct iovec iov;
652 struct process *pr;
653 struct proc *p = curproc;
654 AuxInfo auxv[ELF_AUX_ENTRIES];
655 int i, error;
656
657 dtga->dtga_auxbase = 0;
658
659 if ((pr = prfind(dtga->dtga_pid)) == NULL)
660 return ESRCH;
661
662 iov.iov_base = auxv;
663 iov.iov_len = sizeof(auxv);
664 uio.uio_iov = &iov;
665 uio.uio_iovcnt = 1;
666 uio.uio_offset = pr->ps_auxinfo;
667 uio.uio_resid = sizeof(auxv);
668 uio.uio_segflg = UIO_SYSSPACE;
669 uio.uio_procp = p;
670 uio.uio_rw = UIO_READ;
671
672 error = process_domem(p, pr, &uio, PT_READ_D);
673 if (error)
674 return error;
675
676 for (i = 0; i < ELF_AUX_ENTRIES; i++)
677 if (auxv[i].au_id == AUX_base)
678 dtga->dtga_auxbase = auxv[i].au_v;
679
680 return 0;
681 }
682
683 struct dt_probe *
dt_dev_alloc_probe(const char * func,const char * name,struct dt_provider * dtpv)684 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
685 {
686 struct dt_probe *dtp;
687
688 dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
689 if (dtp == NULL)
690 return NULL;
691
692 SMR_SLIST_INIT(&dtp->dtp_pcbs);
693 dtp->dtp_prov = dtpv;
694 dtp->dtp_func = func;
695 dtp->dtp_name = name;
696 dtp->dtp_sysnum = -1;
697 dtp->dtp_ref = 0;
698
699 return dtp;
700 }
701
702 void
dt_dev_register_probe(struct dt_probe * dtp)703 dt_dev_register_probe(struct dt_probe *dtp)
704 {
705 static uint64_t probe_nb;
706
707 dtp->dtp_pbn = ++probe_nb;
708 SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
709 }
710
711 struct dt_pcb *
dt_pcb_alloc(struct dt_probe * dtp,struct dt_softc * sc)712 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
713 {
714 struct dt_pcb *dp;
715
716 dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
717 if (dp == NULL)
718 return NULL;
719
720 dp->dp_sc = sc;
721 dp->dp_dtp = dtp;
722 return dp;
723 }
724
725 void
dt_pcb_free(struct dt_pcb * dp)726 dt_pcb_free(struct dt_pcb *dp)
727 {
728 free(dp, M_DT, sizeof(*dp));
729 }
730
731 void
dt_pcb_purge(struct dt_pcb_list * plist)732 dt_pcb_purge(struct dt_pcb_list *plist)
733 {
734 struct dt_pcb *dp;
735
736 while ((dp = TAILQ_FIRST(plist)) != NULL) {
737 TAILQ_REMOVE(plist, dp, dp_snext);
738 dt_pcb_free(dp);
739 }
740 }
741
742 void
dt_pcb_ring_skiptick(struct dt_pcb * dp,unsigned int skip)743 dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip)
744 {
745 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
746
747 dc->dc_skiptick += skip;
748 membar_producer();
749 }
750
751 /*
752 * Get a reference to the next free event state from the ring.
753 */
754 struct dt_evt *
dt_pcb_ring_get(struct dt_pcb * dp,int profiling)755 dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
756 {
757 struct proc *p = curproc;
758 struct dt_evt *dtev;
759 int prod, cons, distance;
760 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
761
762 if (dc->dc_inevt == 1) {
763 dc->dc_recurevt++;
764 membar_producer();
765 return NULL;
766 }
767
768 dc->dc_inevt = 1;
769
770 membar_consumer();
771 prod = dc->dc_prod;
772 cons = dc->dc_cons;
773 distance = prod - cons;
774 if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
775 /* read(2) isn't finished */
776 dc->dc_dropevt++;
777 membar_producer();
778
779 dc->dc_inevt = 0;
780 return NULL;
781 }
782
783 /*
784 * Save states in next free event slot.
785 */
786 dtev = &dc->dc_ring[cons];
787 memset(dtev, 0, sizeof(*dtev));
788
789 dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
790 dtev->dtev_cpu = cpu_number();
791 dtev->dtev_pid = p->p_p->ps_pid;
792 dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET;
793 nanotime(&dtev->dtev_tsp);
794
795 if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
796 strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm));
797
798 if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) {
799 if (profiling)
800 stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
801 else
802 stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
803 }
804 if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
805 stacktrace_save_utrace(&dtev->dtev_ustack);
806
807 return dtev;
808 }
809
810 void
dt_pcb_ring_consume(struct dt_pcb * dp,struct dt_evt * dtev)811 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
812 {
813 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
814
815 KASSERT(dtev == &dc->dc_ring[dc->dc_cons]);
816
817 dc->dc_cons = (dc->dc_cons + 1) % DT_EVTRING_SIZE;
818 membar_producer();
819
820 atomic_inc_int(&dp->dp_sc->ds_evtcnt);
821 dc->dc_inevt = 0;
822
823 dt_wakeup(dp->dp_sc);
824 }
825
826 /*
827 * Copy at most `max' events from `dc', producing the same amount
828 * of free slots.
829 */
830 int
dt_ring_copy(struct dt_cpubuf * dc,struct uio * uio,size_t max,size_t * rcvd)831 dt_ring_copy(struct dt_cpubuf *dc, struct uio *uio, size_t max, size_t *rcvd)
832 {
833 size_t count, copied = 0;
834 unsigned int cons, prod;
835 int error = 0;
836
837 KASSERT(max > 0);
838
839 membar_consumer();
840 cons = dc->dc_cons;
841 prod = dc->dc_prod;
842
843 if (cons < prod)
844 count = DT_EVTRING_SIZE - prod;
845 else
846 count = cons - prod;
847
848 if (count == 0)
849 return 0;
850
851 count = MIN(count, max);
852 error = uiomove(&dc->dc_ring[prod], count * sizeof(struct dt_evt), uio);
853 if (error)
854 return error;
855 copied += count;
856
857 /* Produce */
858 prod = (prod + count) % DT_EVTRING_SIZE;
859
860 /* If the ring didn't wrap, stop here. */
861 if (max == copied || prod != 0 || cons == 0)
862 goto out;
863
864 count = MIN(cons, (max - copied));
865 error = uiomove(&dc->dc_ring[0], count * sizeof(struct dt_evt), uio);
866 if (error)
867 goto out;
868
869 copied += count;
870 prod += count;
871
872 out:
873 dc->dc_readevt += copied;
874 dc->dc_prod = prod;
875 membar_producer();
876
877 *rcvd = copied;
878 return error;
879 }
880
881 void
dt_wakeup(struct dt_softc * sc)882 dt_wakeup(struct dt_softc *sc)
883 {
884 /*
885 * It is not always safe or possible to call wakeup(9) and grab
886 * the SCHED_LOCK() from a given tracepoint. This is true for
887 * any tracepoint that might trigger inside the scheduler or at
888 * any IPL higher than IPL_SCHED. For this reason use a soft-
889 * interrupt to defer the wakeup.
890 */
891 softintr_schedule(sc->ds_si);
892 }
893
894 void
dt_deferred_wakeup(void * arg)895 dt_deferred_wakeup(void *arg)
896 {
897 struct dt_softc *sc = arg;
898
899 wakeup(sc);
900 }
901