xref: /openbsd/sys/dev/dt/dt_dev.c (revision 097a140d)
1 /*	$OpenBSD: dt_dev.c,v 1.13 2021/04/23 07:21:02 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/systm.h>
21 #include <sys/param.h>
22 #include <sys/device.h>
23 #include <sys/malloc.h>
24 #include <sys/proc.h>
25 
26 #include <dev/dt/dtvar.h>
27 
28 /*
29  * Number of frames to skip in stack traces.
30  *
31  * The number of frames required to execute dt(4) profiling code
32  * depends on the probe, context, architecture and possibly the
33  * compiler.
34  *
35  * Static probes (tracepoints) are executed in the context of the
36  * current thread and only need to skip frames up to the recording
37  * function.  For example the syscall provider:
38  *
39  *	dt_prov_syscall_entry+0x141
40  *	syscall+0x205		<--- start here
41  *	Xsyscall+0x128
42  *
43  * Probes executed in their own context, like the profile provider,
44  * need to skip the frames of that context which are different for
45  * every architecture.  For example the profile provider executed
46  * from hardclock(9) on amd64:
47  *
48  *	dt_prov_profile_enter+0x6e
49  *	hardclock+0x1a9
50  *	lapic_clockintr+0x3f
51  *	Xresume_lapic_ltimer+0x26
52  *	acpicpu_idle+0x1d2	<---- start here.
53  *	sched_idle+0x225
54  *	proc_trampoline+0x1c
55  */
56 #if defined(__amd64__)
57 #define DT_FA_PROFILE	5
58 #define DT_FA_STATIC	2
59 #elif defined(__powerpc64__)
60 #define DT_FA_PROFILE	6
61 #define DT_FA_STATIC	2
62 #elif defined(__sparc64__)
63 #define DT_FA_PROFILE	5
64 #define DT_FA_STATIC	1
65 #else
66 #define DT_FA_STATIC	0
67 #define DT_FA_PROFILE	0
68 #endif
69 
70 #define DT_EVTRING_SIZE	16	/* # of slots in per PCB event ring */
71 
72 #define DPRINTF(x...) /* nothing */
73 
74 /*
75  * Descriptor associated with each program opening /dev/dt.  It is used
76  * to keep track of enabled PCBs.
77  *
78  *  Locks used to protect struct members in this file:
79  *	m	per-softc mutex
80  *	K	kernel lock
81  */
82 struct dt_softc {
83 	SLIST_ENTRY(dt_softc)	 ds_next;	/* [K] descriptor list */
84 	int			 ds_unit;	/* [I] D_CLONE unique unit */
85 	pid_t			 ds_pid;	/* [I] PID of tracing program */
86 
87 	struct mutex		 ds_mtx;
88 
89 	struct dt_pcb_list	 ds_pcbs;	/* [K] list of enabled PCBs */
90 	struct dt_evt		*ds_bufqueue;	/* [K] copy evts to userland */
91 	size_t			 ds_bufqlen;	/* [K] length of the queue */
92 	int			 ds_recording;	/* [K] currently recording? */
93 	int			 ds_evtcnt;	/* [m] # of readable evts */
94 
95 	/* Counters */
96 	uint64_t		 ds_readevt;	/* [m] # of events read */
97 	uint64_t		 ds_dropevt;	/* [m] # of events dropped */
98 };
99 
100 SLIST_HEAD(, dt_softc) dtdev_list;	/* [K] list of open /dev/dt nodes */
101 
102 /*
103  * Probes are created during dt_attach() and never modified/freed during
104  * the lifetime of the system.  That's why we consider them as [I]mmutable.
105  */
106 unsigned int			dt_nprobes;	/* [I] # of probes available */
107 SIMPLEQ_HEAD(, dt_probe)	dt_probe_list;	/* [I] list of probes */
108 
109 struct rwlock			dt_lock = RWLOCK_INITIALIZER("dtlk");
110 volatile uint32_t		dt_tracing = 0;	/* [K] # of processes tracing */
111 
112 int allowdt;
113 
114 void	dtattach(struct device *, struct device *, void *);
115 int	dtopen(dev_t, int, int, struct proc *);
116 int	dtclose(dev_t, int, int, struct proc *);
117 int	dtread(dev_t, struct uio *, int);
118 int	dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
119 
120 struct	dt_softc *dtlookup(int);
121 
122 int	dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
123 int	dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
124 int	dt_ioctl_record_start(struct dt_softc *);
125 void	dt_ioctl_record_stop(struct dt_softc *);
126 int	dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
127 void	dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
128 
129 int	dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
130 
131 void
132 dtattach(struct device *parent, struct device *self, void *aux)
133 {
134 	SLIST_INIT(&dtdev_list);
135 	SIMPLEQ_INIT(&dt_probe_list);
136 
137 	/* Init providers */
138 	dt_nprobes += dt_prov_profile_init();
139 	dt_nprobes += dt_prov_syscall_init();
140 	dt_nprobes += dt_prov_static_init();
141 
142 	printf("dt: %u probes\n", dt_nprobes);
143 }
144 
145 int
146 dtopen(dev_t dev, int flags, int mode, struct proc *p)
147 {
148 	struct dt_softc *sc;
149 	int unit = minor(dev);
150 
151 	if (!allowdt)
152 		return EPERM;
153 
154 	KASSERT(dtlookup(unit) == NULL);
155 
156 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
157 	if (sc == NULL)
158 		return ENOMEM;
159 
160 	/*
161 	 * Enough space to empty 2 full rings of events in a single read.
162 	 */
163 	sc->ds_bufqlen = 2 * DT_EVTRING_SIZE;
164 	sc->ds_bufqueue = mallocarray(sc->ds_bufqlen, sizeof(*sc->ds_bufqueue),
165 	    M_DEVBUF, M_WAITOK|M_CANFAIL);
166 	if (sc->ds_bufqueue == NULL)
167 		goto bad;
168 
169 	sc->ds_unit = unit;
170 	sc->ds_pid = p->p_p->ps_pid;
171 	TAILQ_INIT(&sc->ds_pcbs);
172 	mtx_init(&sc->ds_mtx, IPL_HIGH);
173 	sc->ds_evtcnt = 0;
174 	sc->ds_readevt = 0;
175 	sc->ds_dropevt = 0;
176 
177 	SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
178 
179 	DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
180 
181 	return 0;
182 
183 bad:
184 	free(sc, M_DEVBUF, sizeof(*sc));
185 	return ENOMEM;
186 }
187 
188 int
189 dtclose(dev_t dev, int flags, int mode, struct proc *p)
190 {
191 	struct dt_softc *sc;
192 	int unit = minor(dev);
193 
194 	sc = dtlookup(unit);
195 	KASSERT(sc != NULL);
196 
197 	DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
198 
199 	SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
200 	dt_ioctl_record_stop(sc);
201 	dt_pcb_purge(&sc->ds_pcbs);
202 
203 	free(sc->ds_bufqueue, M_DEVBUF,
204 	    sc->ds_bufqlen * sizeof(*sc->ds_bufqueue));
205 	free(sc, M_DEVBUF, sizeof(*sc));
206 
207 	return 0;
208 }
209 
210 int
211 dtread(dev_t dev, struct uio *uio, int flags)
212 {
213 	struct sleep_state sls;
214 	struct dt_softc *sc;
215 	struct dt_evt *estq;
216 	struct dt_pcb *dp;
217 	int error = 0, unit = minor(dev);
218 	size_t qlen, count, read = 0;
219 	uint64_t dropped = 0;
220 
221 	sc = dtlookup(unit);
222 	KASSERT(sc != NULL);
223 
224 	count = howmany(uio->uio_resid, sizeof(struct dt_evt));
225 	if (count < 1)
226 		return (EMSGSIZE);
227 
228 	while (!sc->ds_evtcnt) {
229 		sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0);
230 		error = sleep_finish(&sls, !sc->ds_evtcnt);
231 		if (error == EINTR || error == ERESTART)
232 			break;
233 	}
234 	if (error)
235 		return error;
236 
237 	estq = sc->ds_bufqueue;
238 	qlen = MIN(sc->ds_bufqlen, count);
239 
240 	KERNEL_ASSERT_LOCKED();
241 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
242 		count = dt_pcb_ring_copy(dp, estq, qlen, &dropped);
243 		read += count;
244 		estq += count; /* pointer aritmetic */
245 		qlen -= count;
246 		if (qlen == 0)
247 			break;
248 	}
249 	if (read > 0)
250 		uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio);
251 
252 	mtx_enter(&sc->ds_mtx);
253 	sc->ds_evtcnt -= read;
254 	sc->ds_readevt += read;
255 	sc->ds_dropevt += dropped;
256 	mtx_leave(&sc->ds_mtx);
257 
258 	return 0;
259 }
260 
261 int
262 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
263 {
264 	struct dt_softc *sc;
265 	int unit = minor(dev);
266 	int on, error = 0;
267 
268 	sc = dtlookup(unit);
269 	KASSERT(sc != NULL);
270 
271 	switch (cmd) {
272 	case DTIOCGPLIST:
273 		return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
274 	case DTIOCGSTATS:
275 		return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
276 	case DTIOCRECORD:
277 	case DTIOCPRBENABLE:
278 		/* root only ioctl(2) */
279 		break;
280 	default:
281 		return ENOTTY;
282 	}
283 
284 	if ((error = suser(p)) != 0)
285 		return error;
286 
287 	switch (cmd) {
288 	case DTIOCRECORD:
289 		on = *(int *)addr;
290 		if (on)
291 			error = dt_ioctl_record_start(sc);
292 		else
293 			dt_ioctl_record_stop(sc);
294 		break;
295 	case DTIOCPRBENABLE:
296 		error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
297 		break;
298 	default:
299 		KASSERT(0);
300 	}
301 
302 	return error;
303 }
304 
305 struct dt_softc *
306 dtlookup(int unit)
307 {
308 	struct dt_softc *sc;
309 
310 	KERNEL_ASSERT_LOCKED();
311 
312 	SLIST_FOREACH(sc, &dtdev_list, ds_next) {
313 		if (sc->ds_unit == unit)
314 			break;
315 	}
316 
317 	return sc;
318 }
319 
320 int
321 dtioc_req_isvalid(struct dtioc_req *dtrq)
322 {
323 	switch (dtrq->dtrq_filter.dtf_operand) {
324 	case DT_OP_NONE:
325 	case DT_OP_EQ:
326 	case DT_OP_NE:
327 		break;
328 	default:
329 		return 0;
330 	}
331 
332 	switch (dtrq->dtrq_filter.dtf_variable) {
333 	case DT_FV_NONE:
334 	case DT_FV_PID:
335 	case DT_FV_TID:
336 		break;
337 	default:
338 		return 0;
339 	}
340 
341 	return 1;
342 }
343 
344 int
345 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
346 {
347 	struct dtioc_probe_info info, *dtpi;
348 	struct dt_probe *dtp;
349 	size_t size;
350 	int error = 0;
351 
352 	size = dtpr->dtpr_size;
353 	dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
354 	if (size == 0)
355 		return 0;
356 
357 	dtpi = dtpr->dtpr_probes;
358 	memset(&info, 0, sizeof(info));
359 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
360 		if (size < sizeof(*dtpi)) {
361 			error = ENOSPC;
362 			break;
363 		}
364 		info.dtpi_pbn = dtp->dtp_pbn;
365 		info.dtpi_nargs = dtp->dtp_nargs;
366 		strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
367 		    sizeof(info.dtpi_prov));
368 		strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
369 		strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
370 		error = copyout(&info, dtpi, sizeof(*dtpi));
371 		if (error)
372 			break;
373 		size -= sizeof(*dtpi);
374 		dtpi++;
375 	};
376 
377 	return error;
378 }
379 
380 int
381 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
382 {
383 	mtx_enter(&sc->ds_mtx);
384 	dtst->dtst_readevt = sc->ds_readevt;
385 	dtst->dtst_dropevt = sc->ds_dropevt;
386 	mtx_leave(&sc->ds_mtx);
387 
388 	return 0;
389 }
390 
391 int
392 dt_ioctl_record_start(struct dt_softc *sc)
393 {
394 	struct dt_pcb *dp;
395 
396 	if (sc->ds_recording)
397 		return EBUSY;
398 
399 	KERNEL_ASSERT_LOCKED();
400  	if (TAILQ_EMPTY(&sc->ds_pcbs))
401 		return ENOENT;
402 
403 	rw_enter_write(&dt_lock);
404 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
405 		struct dt_probe *dtp = dp->dp_dtp;
406 
407 		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
408 		dtp->dtp_recording++;
409 		dtp->dtp_prov->dtpv_recording++;
410 	}
411 	rw_exit_write(&dt_lock);
412 
413 	sc->ds_recording = 1;
414 	dt_tracing++;
415 
416 	return 0;
417 }
418 
419 void
420 dt_ioctl_record_stop(struct dt_softc *sc)
421 {
422 	struct dt_pcb *dp;
423 
424 	KASSERT(suser(curproc) == 0);
425 
426 	if (!sc->ds_recording)
427 		return;
428 
429 	DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
430 
431 	dt_tracing--;
432 	sc->ds_recording = 0;
433 
434 	rw_enter_write(&dt_lock);
435 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
436 		struct dt_probe *dtp = dp->dp_dtp;
437 
438 		dtp->dtp_recording--;
439 		dtp->dtp_prov->dtpv_recording--;
440 		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
441 	}
442 	rw_exit_write(&dt_lock);
443 
444 	/* Wait until readers cannot access the PCBs. */
445 	smr_barrier();
446 }
447 
448 int
449 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
450 {
451 	struct dt_pcb_list plist;
452 	struct dt_probe *dtp;
453 	int error;
454 
455 	KASSERT(suser(curproc) == 0);
456 
457 	if (!dtioc_req_isvalid(dtrq))
458 		return EINVAL;
459 
460 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
461 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
462 			break;
463 	}
464 	if (dtp == NULL)
465 		return ENOENT;
466 
467 	TAILQ_INIT(&plist);
468 	error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
469 	if (error)
470 		return error;
471 
472 	DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
473 	    dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
474 
475 	/* Append all PCBs to this instance */
476 	TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
477 
478 	return 0;
479 }
480 
481 struct dt_probe *
482 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
483 {
484 	struct dt_probe *dtp;
485 
486 	dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
487 	if (dtp == NULL)
488 		return NULL;
489 
490 	SMR_SLIST_INIT(&dtp->dtp_pcbs);
491 	dtp->dtp_prov = dtpv;
492 	dtp->dtp_func = func;
493 	dtp->dtp_name = name;
494 	dtp->dtp_sysnum = -1;
495 
496 	return dtp;
497 }
498 
499 void
500 dt_dev_register_probe(struct dt_probe *dtp)
501 {
502 	static uint64_t probe_nb;
503 
504 	dtp->dtp_pbn = ++probe_nb;
505 	SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
506 }
507 
508 struct dt_pcb *
509 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
510 {
511 	struct dt_pcb *dp;
512 
513 	dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
514 	if (dp == NULL)
515 		goto bad;
516 
517 	dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT,
518 	    M_WAITOK|M_CANFAIL|M_ZERO);
519 	if (dp->dp_ring == NULL)
520 		goto bad;
521 
522 	mtx_init(&dp->dp_mtx, IPL_HIGH);
523 	dp->dp_sc = sc;
524 	dp->dp_dtp = dtp;
525 	return dp;
526 bad:
527 	dt_pcb_free(dp);
528 	return NULL;
529 }
530 
531 void
532 dt_pcb_free(struct dt_pcb *dp)
533 {
534 	if (dp == NULL)
535 		return;
536 	free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
537 	free(dp, M_DT, sizeof(*dp));
538 }
539 
540 void
541 dt_pcb_purge(struct dt_pcb_list *plist)
542 {
543 	struct dt_pcb *dp;
544 
545 	while ((dp = TAILQ_FIRST(plist)) != NULL) {
546 		TAILQ_REMOVE(plist, dp, dp_snext);
547 		dt_pcb_free(dp);
548 	}
549 }
550 
551 int
552 dt_pcb_filter(struct dt_pcb *dp)
553 {
554 	struct dt_filter *dtf = &dp->dp_filter;
555 	struct proc *p = curproc;
556 	unsigned int var;
557 	int match = 1;
558 
559 	/* Filter out tracing program. */
560 	if (dp->dp_sc->ds_pid == p->p_p->ps_pid)
561 		return 1;
562 
563 	switch (dtf->dtf_variable) {
564 	case DT_FV_PID:
565 		var = p->p_p->ps_pid;
566 		break;
567 	case DT_FV_TID:
568 		var = p->p_tid;
569 		break;
570 	case DT_FV_NONE:
571 		break;
572 	default:
573 		KASSERT(0);
574 	}
575 
576 	switch (dtf->dtf_operand) {
577 	case DT_OP_EQ:
578 		match = !!(var == dtf->dtf_value);
579 		break;
580 	case DT_OP_NE:
581 		match = !!(var != dtf->dtf_value);
582 		break;
583 	case DT_OP_NONE:
584 		break;
585 	default:
586 		KASSERT(0);
587 	}
588 
589 	return !match;
590 }
591 
592 
593 /*
594  * Get a reference to the next free event state from the ring.
595  */
596 struct dt_evt *
597 dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
598 {
599 	struct proc *p = curproc;
600 	struct dt_evt *dtev;
601 	int distance;
602 
603 	if (dt_pcb_filter(dp))
604 		return NULL;
605 
606 	mtx_enter(&dp->dp_mtx);
607 	distance = dp->dp_prod - dp->dp_cons;
608 	if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
609 		/* read(2) isn't finished */
610 		dp->dp_dropevt++;
611 		mtx_leave(&dp->dp_mtx);
612 		return NULL;
613 	}
614 
615 	/*
616 	 * Save states in next free event slot.
617 	 */
618 	dtev = &dp->dp_ring[dp->dp_cons];
619 	memset(dtev, 0, sizeof(*dtev));
620 
621 	dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
622 	dtev->dtev_cpu = cpu_number();
623 	dtev->dtev_pid = p->p_p->ps_pid;
624 	dtev->dtev_tid = p->p_tid;
625 	nanotime(&dtev->dtev_tsp);
626 
627 	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
628 		memcpy(dtev->dtev_comm, p->p_p->ps_comm, DTMAXCOMLEN - 1);
629 
630 	if (ISSET(dp->dp_evtflags, DTEVT_KSTACK|DTEVT_USTACK)) {
631 		if (profiling)
632 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
633 		else
634 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
635 	}
636 
637 	return dtev;
638 }
639 
640 void
641 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
642 {
643 	MUTEX_ASSERT_LOCKED(&dp->dp_mtx);
644 	KASSERT(dtev == &dp->dp_ring[dp->dp_cons]);
645 
646 	dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE;
647 	mtx_leave(&dp->dp_mtx);
648 
649 	mtx_enter(&dp->dp_sc->ds_mtx);
650 	dp->dp_sc->ds_evtcnt++;
651 	mtx_leave(&dp->dp_sc->ds_mtx);
652 	wakeup(dp->dp_sc);
653 }
654 
655 /*
656  * Copy at most `qlen' events from `dp', producing the same amount
657  * of free slots.
658  */
659 int
660 dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen,
661     uint64_t *dropped)
662 {
663 	size_t count, copied = 0;
664 	unsigned int cons, prod;
665 
666 	KASSERT(qlen > 0);
667 
668 	mtx_enter(&dp->dp_mtx);
669 	cons = dp->dp_cons;
670 	prod = dp->dp_prod;
671 
672 	if (cons < prod)
673 		count = DT_EVTRING_SIZE - prod;
674 	else
675 		count = cons - prod;
676 
677 	if (count == 0)
678 		goto out;
679 
680 	*dropped += dp->dp_dropevt;
681 	dp->dp_dropevt = 0;
682 
683 	count = MIN(count, qlen);
684 
685 	memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq));
686 	copied += count;
687 
688 	/* Produce */
689 	prod = (prod + count) % DT_EVTRING_SIZE;
690 
691 	/* If the queue is full or the ring didn't wrap, stop here. */
692 	if (qlen == copied || prod != 0 || cons == 0)
693 		goto out;
694 
695 	count = MIN(cons, (qlen - copied));
696 	memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq));
697 	copied += count;
698 	prod += count;
699 
700 out:
701 	dp->dp_prod = prod;
702 	mtx_leave(&dp->dp_mtx);
703 	return copied;
704 }
705