xref: /openbsd/sys/dev/dt/dt_dev.c (revision 4cfece93)
1 /*	$OpenBSD: dt_dev.c,v 1.8 2020/07/04 08:06:07 anton Exp $ */
2 
3 /*
4  * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/systm.h>
21 #include <sys/param.h>
22 #include <sys/device.h>
23 #include <sys/malloc.h>
24 #include <sys/proc.h>
25 
26 #include <dev/dt/dtvar.h>
27 
28 /*
29  * Number of frames to skip in stack traces.
30  *
31  * The number of frames required to execute dt(4) profiling code
32  * depends on the probe, context, architecture and possibly the
33  * compiler.
34  *
35  * Static probes (tracepoints) are executed in the context of the
36  * current thread and only need to skip frames up to the recording
37  * function.  For example the syscall provider:
38  *
39  *	dt_prov_syscall_entry+0x141
40  *	syscall+0x205		<--- start here
41  *	Xsyscall+0x128
42  *
43  * Probes executed in their own context, like the profile provider,
44  * need to skip the frames of that context which are different for
45  * every architecture.  For example the profile provider executed
46  * from hardclock(9) on amd64:
47  *
48  *	dt_prov_profile_enter+0x6e
49  *	hardclock+0x1a9
50  *	lapic_clockintr+0x3f
51  *	Xresume_lapic_ltimer+0x26
52  *	acpicpu_idle+0x1d2	<---- start here.
53  *	sched_idle+0x225
54  *	proc_trampoline+0x1c
55  */
56 #if defined(__amd64__)
57 #define DT_FA_PROFILE	5
58 #define DT_FA_STATIC	2
59 #elif defined(__sparc64__)
60 #define DT_FA_PROFILE	5
61 #define DT_FA_STATIC	1
62 #else
63 #define DT_FA_STATIC	0
64 #define DT_FA_PROFILE	0
65 #endif
66 
67 #define DT_EVTRING_SIZE	16	/* # of slots in per PCB event ring */
68 
69 #define DPRINTF(x...) /* nothing */
70 
71 /*
72  * Descriptor associated with each program opening /dev/dt.  It is used
73  * to keep track of enabled PCBs.
74  *
75  *  Locks used to protect struct members in this file:
76  *	m	per-softc mutex
77  *	K	kernel lock
78  */
79 struct dt_softc {
80 	SLIST_ENTRY(dt_softc)	 ds_next;	/* [K] descriptor list */
81 	int			 ds_unit;	/* [I] D_CLONE unique unit */
82 	pid_t			 ds_pid;	/* [I] PID of tracing program */
83 
84 	struct mutex		 ds_mtx;
85 
86 	struct dt_pcb_list	 ds_pcbs;	/* [K] list of enabled PCBs */
87 	struct dt_evt		*ds_bufqueue;	/* [K] copy evts to userland */
88 	size_t			 ds_bufqlen;	/* [K] length of the queue */
89 	int			 ds_recording;	/* [K] currently recording? */
90 	int			 ds_evtcnt;	/* [m] # of readable evts */
91 
92 	/* Counters */
93 	uint64_t		 ds_readevt;	/* [m] # of events read */
94 	uint64_t		 ds_dropevt;	/* [m] # of events dropped */
95 };
96 
97 SLIST_HEAD(, dt_softc) dtdev_list;	/* [K] list of open /dev/dt nodes */
98 
99 /*
100  * Probes are created during dt_attach() and never modified/freed during
101  * the lifetime of the system.  That's why we consider them as [I]mmutable.
102  */
103 unsigned int			dt_nprobes;	/* [I] # of probes available */
104 SIMPLEQ_HEAD(, dt_probe)	dt_probe_list;	/* [I] list of probes */
105 
106 struct rwlock			dt_lock = RWLOCK_INITIALIZER("dtlk");
107 volatile uint32_t		dt_tracing = 0;	/* [K] # of processes tracing */
108 
109 void	dtattach(struct device *, struct device *, void *);
110 int	dtopen(dev_t, int, int, struct proc *);
111 int	dtclose(dev_t, int, int, struct proc *);
112 int	dtread(dev_t, struct uio *, int);
113 int	dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
114 
115 struct	dt_softc *dtlookup(int);
116 
117 int	dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
118 int	dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
119 int	dt_ioctl_record_start(struct dt_softc *);
120 void	dt_ioctl_record_stop(struct dt_softc *);
121 int	dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
122 void	dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
123 
124 int	dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
125 
126 void
127 dtattach(struct device *parent, struct device *self, void *aux)
128 {
129 	SLIST_INIT(&dtdev_list);
130 	SIMPLEQ_INIT(&dt_probe_list);
131 
132 	/* Init providers */
133 	dt_nprobes += dt_prov_profile_init();
134 	dt_nprobes += dt_prov_syscall_init();
135 	dt_nprobes += dt_prov_static_init();
136 
137 	printf("dt: %u probes\n", dt_nprobes);
138 }
139 
140 int
141 dtopen(dev_t dev, int flags, int mode, struct proc *p)
142 {
143 	struct dt_softc *sc;
144 	int unit = minor(dev);
145 	extern int allowdt;
146 
147 	if (!allowdt)
148 		return EPERM;
149 
150 	KASSERT(dtlookup(unit) == NULL);
151 
152 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
153 	if (sc == NULL)
154 		return ENOMEM;
155 
156 	/*
157 	 * Enough space to empty 2 full rings of events in a single read.
158 	 */
159 	sc->ds_bufqlen = 2 * DT_EVTRING_SIZE;
160 	sc->ds_bufqueue = mallocarray(sc->ds_bufqlen, sizeof(*sc->ds_bufqueue),
161 	    M_DEVBUF, M_WAITOK|M_CANFAIL);
162 	if (sc->ds_bufqueue == NULL)
163 		goto bad;
164 
165 	sc->ds_unit = unit;
166 	sc->ds_pid = p->p_p->ps_pid;
167 	TAILQ_INIT(&sc->ds_pcbs);
168 	mtx_init(&sc->ds_mtx, IPL_HIGH);
169 	sc->ds_evtcnt = 0;
170 	sc->ds_readevt = 0;
171 	sc->ds_dropevt = 0;
172 
173 	SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
174 
175 	DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
176 
177 	return 0;
178 
179 bad:
180 	free(sc, M_DEVBUF, sizeof(*sc));
181 	return ENOMEM;
182 }
183 
184 int
185 dtclose(dev_t dev, int flags, int mode, struct proc *p)
186 {
187 	struct dt_softc *sc;
188 	int unit = minor(dev);
189 
190 	sc = dtlookup(unit);
191 	KASSERT(sc != NULL);
192 
193 	DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
194 
195 	SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
196 	dt_ioctl_record_stop(sc);
197 	dt_pcb_purge(&sc->ds_pcbs);
198 
199 	free(sc->ds_bufqueue, M_DEVBUF,
200 	    sc->ds_bufqlen * sizeof(*sc->ds_bufqueue));
201 	free(sc, M_DEVBUF, sizeof(*sc));
202 
203 	return 0;
204 }
205 
206 int
207 dtread(dev_t dev, struct uio *uio, int flags)
208 {
209 	struct sleep_state sls;
210 	struct dt_softc *sc;
211 	struct dt_evt *estq;
212 	struct dt_pcb *dp;
213 	int error, unit = minor(dev);
214 	size_t qlen, count, read = 0;
215 	uint64_t dropped = 0;
216 
217 	sc = dtlookup(unit);
218 	KASSERT(sc != NULL);
219 
220 	count = howmany(uio->uio_resid, sizeof(struct dt_evt));
221 	if (count < 1)
222 		return (EMSGSIZE);
223 
224 	while (!sc->ds_evtcnt) {
225 		sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread");
226 		sleep_setup_signal(&sls);
227 		sleep_finish(&sls, !sc->ds_evtcnt);
228 		error = sleep_finish_signal(&sls);
229 		if (error == EINTR || error == ERESTART)
230 			break;
231 	}
232 	if (error)
233 		return error;
234 
235 	estq = sc->ds_bufqueue;
236 	qlen = MIN(sc->ds_bufqlen, count);
237 
238 	KERNEL_ASSERT_LOCKED();
239 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
240 		count = dt_pcb_ring_copy(dp, estq, qlen, &dropped);
241 		read += count;
242 		estq += count; /* pointer aritmetic */
243 		qlen -= count;
244 		if (qlen == 0)
245 			break;
246 	}
247 	if (read > 0)
248 		uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio);
249 
250 	mtx_enter(&sc->ds_mtx);
251 	sc->ds_evtcnt -= read;
252 	sc->ds_readevt += read;
253 	sc->ds_dropevt += dropped;
254 	mtx_leave(&sc->ds_mtx);
255 
256 	return 0;
257 }
258 
259 int
260 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
261 {
262 	struct dt_softc *sc;
263 	int unit = minor(dev);
264 	int on, error = 0;
265 
266 	sc = dtlookup(unit);
267 	KASSERT(sc != NULL);
268 
269 	switch (cmd) {
270 	case DTIOCGPLIST:
271 		return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
272 	case DTIOCGSTATS:
273 		return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
274 	case DTIOCRECORD:
275 	case DTIOCPRBENABLE:
276 		/* root only ioctl(2) */
277 		break;
278 	default:
279 		return ENOTTY;
280 	}
281 
282 	if ((error = suser(p)) != 0)
283 		return error;
284 
285 	switch (cmd) {
286 	case DTIOCRECORD:
287 		on = *(int *)addr;
288 		if (on)
289 			error = dt_ioctl_record_start(sc);
290 		else
291 			dt_ioctl_record_stop(sc);
292 		break;
293 	case DTIOCPRBENABLE:
294 		error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
295 		break;
296 	default:
297 		KASSERT(0);
298 	}
299 
300 	return error;
301 }
302 
303 struct dt_softc *
304 dtlookup(int unit)
305 {
306 	struct dt_softc *sc;
307 
308 	KERNEL_ASSERT_LOCKED();
309 
310 	SLIST_FOREACH(sc, &dtdev_list, ds_next) {
311 		if (sc->ds_unit == unit)
312 			break;
313 	}
314 
315 	return sc;
316 }
317 
318 int
319 dtioc_req_isvalid(struct dtioc_req *dtrq)
320 {
321 	switch (dtrq->dtrq_filter.dtf_operand) {
322 	case DT_OP_NONE:
323 	case DT_OP_EQ:
324 	case DT_OP_NE:
325 		break;
326 	default:
327 		return 0;
328 	}
329 
330 	switch (dtrq->dtrq_filter.dtf_variable) {
331 	case DT_FV_NONE:
332 	case DT_FV_PID:
333 	case DT_FV_TID:
334 		break;
335 	default:
336 		return 0;
337 	}
338 
339 	return 1;
340 }
341 
342 int
343 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
344 {
345 	struct dtioc_probe_info info, *dtpi;
346 	struct dt_probe *dtp;
347 	size_t size;
348 	int error = 0;
349 
350 	if (dtpr->dtpr_size == 0) {
351 		dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
352 		return 0;
353 	}
354 
355 	size = dtpr->dtpr_size;
356 	dtpi = dtpr->dtpr_probes;
357 	memset(&info, 0, sizeof(info));
358 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
359 		if (size < sizeof(*dtpi)) {
360 			error = ENOSPC;
361 			break;
362 		}
363 		info.dtpi_pbn = dtp->dtp_pbn;
364 		info.dtpi_nargs = dtp->dtp_nargs;
365 		strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
366 		    sizeof(info.dtpi_prov));
367 		strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
368 		strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
369 		error = copyout(&info, dtpi, sizeof(*dtpi));
370 		if (error)
371 			break;
372 		size -= sizeof(*dtpi);
373 		dtpi++;
374 	};
375 
376 	return error;
377 }
378 
379 int
380 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
381 {
382 	mtx_enter(&sc->ds_mtx);
383 	dtst->dtst_readevt = sc->ds_readevt;
384 	dtst->dtst_dropevt = sc->ds_dropevt;
385 	mtx_leave(&sc->ds_mtx);
386 
387 	return 0;
388 }
389 
390 int
391 dt_ioctl_record_start(struct dt_softc *sc)
392 {
393 	struct dt_pcb *dp;
394 
395 	if (sc->ds_recording)
396 		return EBUSY;
397 
398 	KERNEL_ASSERT_LOCKED();
399  	if (TAILQ_EMPTY(&sc->ds_pcbs))
400 		return ENOENT;
401 
402 	rw_enter_write(&dt_lock);
403 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
404 		struct dt_probe *dtp = dp->dp_dtp;
405 
406 		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
407 		dtp->dtp_recording++;
408 		dtp->dtp_prov->dtpv_recording++;
409 	}
410 	rw_exit_write(&dt_lock);
411 
412 	sc->ds_recording = 1;
413 	dt_tracing++;
414 
415 	return 0;
416 }
417 
418 void
419 dt_ioctl_record_stop(struct dt_softc *sc)
420 {
421 	struct dt_pcb *dp;
422 
423 	KASSERT(suser(curproc) == 0);
424 
425 	if (!sc->ds_recording)
426 		return;
427 
428 	DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
429 
430 	dt_tracing--;
431 	sc->ds_recording = 0;
432 
433 	rw_enter_write(&dt_lock);
434 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
435 		struct dt_probe *dtp = dp->dp_dtp;
436 
437 		dtp->dtp_recording--;
438 		dtp->dtp_prov->dtpv_recording--;
439 		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
440 	}
441 	rw_exit_write(&dt_lock);
442 
443 	/* Wait until readers cannot access the PCBs. */
444 	smr_barrier();
445 }
446 
447 int
448 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
449 {
450 	struct dt_pcb_list plist;
451 	struct dt_probe *dtp;
452 	int error;
453 
454 	KASSERT(suser(curproc) == 0);
455 
456 	if (!dtioc_req_isvalid(dtrq))
457 		return EINVAL;
458 
459 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
460 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
461 			break;
462 	}
463 	if (dtp == NULL)
464 		return ENOENT;
465 
466 	TAILQ_INIT(&plist);
467 	error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
468 	if (error)
469 		return error;
470 
471 	DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
472 	    dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
473 
474 	/* Append all PCBs to this instance */
475 	TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
476 
477 	return 0;
478 }
479 
480 struct dt_probe *
481 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
482 {
483 	struct dt_probe *dtp;
484 
485 	dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
486 	if (dtp == NULL)
487 		return NULL;
488 
489 	SMR_SLIST_INIT(&dtp->dtp_pcbs);
490 	dtp->dtp_prov = dtpv;
491 	dtp->dtp_func = func;
492 	dtp->dtp_name = name;
493 	dtp->dtp_sysnum = -1;
494 
495 	return dtp;
496 }
497 
498 void
499 dt_dev_register_probe(struct dt_probe *dtp)
500 {
501 	static uint64_t probe_nb;
502 
503 	dtp->dtp_pbn = ++probe_nb;
504 	SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
505 }
506 
507 struct dt_pcb *
508 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
509 {
510 	struct dt_pcb *dp;
511 
512 	dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
513 	if (dp == NULL)
514 		goto bad;
515 
516 	dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT,
517 	    M_WAITOK|M_CANFAIL|M_ZERO);
518 	if (dp->dp_ring == NULL)
519 		goto bad;
520 
521 	mtx_init(&dp->dp_mtx, IPL_HIGH);
522 	dp->dp_sc = sc;
523 	dp->dp_dtp = dtp;
524 	return dp;
525 bad:
526 	dt_pcb_free(dp);
527 	return NULL;
528 }
529 
530 void
531 dt_pcb_free(struct dt_pcb *dp)
532 {
533 	if (dp == NULL)
534 		return;
535 	free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
536 	free(dp, M_DT, sizeof(*dp));
537 }
538 
539 void
540 dt_pcb_purge(struct dt_pcb_list *plist)
541 {
542 	struct dt_pcb *dp;
543 
544 	while ((dp = TAILQ_FIRST(plist)) != NULL) {
545 		TAILQ_REMOVE(plist, dp, dp_snext);
546 		dt_pcb_free(dp);
547 	}
548 }
549 
550 int
551 dt_pcb_filter(struct dt_pcb *dp)
552 {
553 	struct dt_filter *dtf = &dp->dp_filter;
554 	struct proc *p = curproc;
555 	unsigned int var;
556 	int match = 1;
557 
558 	/* Filter out tracing program. */
559 	if (dp->dp_sc->ds_pid == p->p_p->ps_pid)
560 		return 1;
561 
562 	switch (dtf->dtf_variable) {
563 	case DT_FV_PID:
564 		var = p->p_p->ps_pid;
565 		break;
566 	case DT_FV_TID:
567 		var = p->p_tid;
568 		break;
569 	case DT_FV_NONE:
570 		break;
571 	default:
572 		KASSERT(0);
573 	}
574 
575 	switch (dtf->dtf_operand) {
576 	case DT_OP_EQ:
577 		match = !!(var == dtf->dtf_value);
578 		break;
579 	case DT_OP_NE:
580 		match = !!(var != dtf->dtf_value);
581 		break;
582 	case DT_OP_NONE:
583 		break;
584 	default:
585 		KASSERT(0);
586 	}
587 
588 	return !match;
589 }
590 
591 
592 /*
593  * Get a reference to the next free event state from the ring.
594  */
595 struct dt_evt *
596 dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
597 {
598 	struct proc *p = curproc;
599 	struct dt_evt *dtev;
600 	int distance;
601 
602 	if (dt_pcb_filter(dp))
603 		return NULL;
604 
605 	mtx_enter(&dp->dp_mtx);
606 	distance = dp->dp_prod - dp->dp_cons;
607 	if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
608 		/* read(2) isn't finished */
609 		dp->dp_dropevt++;
610 		mtx_leave(&dp->dp_mtx);
611 		return NULL;
612 	}
613 
614 	/*
615 	 * Save states in next free event slot.
616 	 */
617 	dtev = &dp->dp_ring[dp->dp_cons];
618 	memset(dtev, 0, sizeof(*dtev));
619 
620 	dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
621 	dtev->dtev_cpu = cpu_number();
622 	dtev->dtev_pid = p->p_p->ps_pid;
623 	dtev->dtev_tid = p->p_tid;
624 	nanotime(&dtev->dtev_tsp);
625 
626 	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
627 		memcpy(dtev->dtev_comm, p->p_p->ps_comm, DTMAXCOMLEN - 1);
628 
629 	if (ISSET(dp->dp_evtflags, DTEVT_KSTACK|DTEVT_USTACK)) {
630 		if (profiling)
631 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
632 		else
633 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
634 	}
635 
636 	return dtev;
637 }
638 
639 void
640 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
641 {
642 	MUTEX_ASSERT_LOCKED(&dp->dp_mtx);
643 	KASSERT(dtev == &dp->dp_ring[dp->dp_cons]);
644 
645 	dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE;
646 	mtx_leave(&dp->dp_mtx);
647 
648 	mtx_enter(&dp->dp_sc->ds_mtx);
649 	dp->dp_sc->ds_evtcnt++;
650 	mtx_leave(&dp->dp_sc->ds_mtx);
651 	wakeup(dp->dp_sc);
652 }
653 
654 /*
655  * Copy at most `qlen' events from `dp', producing the same amount
656  * of free slots.
657  */
658 int
659 dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen,
660     uint64_t *dropped)
661 {
662 	size_t count, copied = 0;
663 	unsigned int cons, prod;
664 
665 	KASSERT(qlen > 0);
666 
667 	mtx_enter(&dp->dp_mtx);
668 	cons = dp->dp_cons;
669 	prod = dp->dp_prod;
670 
671 	if (cons < prod)
672 		count = DT_EVTRING_SIZE - prod;
673 	else
674 		count = cons - prod;
675 
676 	if (count == 0)
677 		goto out;
678 
679 	*dropped += dp->dp_dropevt;
680 	dp->dp_dropevt = 0;
681 
682 	count = MIN(count, qlen);
683 
684 	memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq));
685 	copied += count;
686 
687 	/* Produce */
688 	prod = (prod + count) % DT_EVTRING_SIZE;
689 
690 	/* If the queue is full or the ring didn't wrap, stop here. */
691 	if (qlen == copied || prod != 0 || cons == 0)
692 		goto out;
693 
694 	count = MIN(cons, (qlen - copied));
695 	memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq));
696 	copied += count;
697 	prod += count;
698 
699 out:
700 	dp->dp_prod = prod;
701 	mtx_leave(&dp->dp_mtx);
702 	return copied;
703 }
704