xref: /freebsd/sys/security/audit/audit_pipe.c (revision 7bd6fde3)
1 /*-
2  * Copyright (c) 2006 Robert N. M. Watson
3  * All rights reserved.
4  *
5  * This software was developed by Robert Watson for the TrustedBSD Project.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/conf.h>
34 #include <sys/eventhandler.h>
35 #include <sys/filio.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/poll.h>
41 #include <sys/proc.h>
42 #include <sys/queue.h>
43 #include <sys/selinfo.h>
44 #include <sys/sigio.h>
45 #include <sys/signal.h>
46 #include <sys/signalvar.h>
47 #include <sys/systm.h>
48 #include <sys/uio.h>
49 
50 #include <security/audit/audit.h>
51 #include <security/audit/audit_ioctl.h>
52 #include <security/audit/audit_private.h>
53 
54 /*
55  * Implementation of a clonable special device providing a live stream of BSM
56  * audit data.  This is a "tee" of the data going to the file.  It provides
57  * unreliable but timely access to audit events.  Consumers of this interface
58  * should be very careful to avoid introducing event cycles.  Consumers may
59  * express interest via a set of preselection ioctls.
60  */
61 
62 /*
63  * Memory types.
64  */
65 static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
66 static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
67     "Audit pipe entries and buffers");
68 static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_preselect",
69     "Audit pipe preselection structure");
70 
71 /*
72  * Audit pipe buffer parameters.
73  */
74 #define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
75 #define	AUDIT_PIPE_QLIMIT_MIN		(0)
76 #define	AUDIT_PIPE_QLIMIT_MAX		(1024)
77 
78 /*
79  * Description of an entry in an audit_pipe.
80  */
81 struct audit_pipe_entry {
82 	void				*ape_record;
83 	u_int				 ape_record_len;
84 	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
85 };
86 
87 /*
88  * Audit pipes allow processes to express "interest" in the set of records
89  * that are delivered via the pipe.  They do this in a similar manner to the
90  * mechanism for audit trail configuration, by expressing two global masks,
91  * and optionally expressing per-auid masks.  The following data structure is
92  * the per-auid mask description.  The global state is stored in the audit
93  * pipe data structure.
94  *
95  * We may want to consider a more space/time-efficient data structure once
96  * usage patterns for per-auid specifications are clear.
97  */
98 struct audit_pipe_preselect {
99 	au_id_t					 app_auid;
100 	au_mask_t				 app_mask;
101 	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
102 };
103 
104 /*
105  * Description of an individual audit_pipe.  Consists largely of a bounded
106  * length queue.
107  */
108 #define	AUDIT_PIPE_ASYNC	0x00000001
109 #define	AUDIT_PIPE_NBIO		0x00000002
110 struct audit_pipe {
111 	int				 ap_open;	/* Device open? */
112 	u_int				 ap_flags;
113 
114 	struct selinfo			 ap_selinfo;
115 	struct sigio			*ap_sigio;
116 
117 	u_int				 ap_qlen;
118 	u_int				 ap_qlimit;
119 
120 	u_int64_t			 ap_inserts;	/* Records added. */
121 	u_int64_t			 ap_reads;	/* Records read. */
122 	u_int64_t			 ap_drops;	/* Records dropped. */
123 	u_int64_t			 ap_truncates;	/* Records too long. */
124 
125 	/*
126 	 * Fields relating to pipe interest: global masks for unmatched
127 	 * processes (attributable, non-attributable), and a list of specific
128 	 * interest specifications by auid.
129 	 */
130 	int				 ap_preselect_mode;
131 	au_mask_t			 ap_preselect_flags;
132 	au_mask_t			 ap_preselect_naflags;
133 	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
134 
135 	/*
136 	 * Current pending record list.
137 	 */
138 	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
139 
140 	/*
141 	 * Global pipe list.
142 	 */
143 	TAILQ_ENTRY(audit_pipe)		 ap_list;
144 };
145 
146 /*
147  * Global list of audit pipes, mutex to protect it and the pipes.  Finer
148  * grained locking may be desirable at some point.
149  */
150 static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
151 static struct mtx		 audit_pipe_mtx;
152 
153 /*
154  * This CV is used to wakeup on an audit record write.  Eventually, it might
155  * be per-pipe to avoid unnecessary wakeups when several pipes with different
156  * preselection masks are present.
157  */
158 static struct cv		 audit_pipe_cv;
159 
160 /*
161  * Cloning related variables and constants.
162  */
163 #define	AUDIT_PIPE_NAME		"auditpipe"
164 static eventhandler_tag		 audit_pipe_eh_tag;
165 static struct clonedevs		*audit_pipe_clones;
166 
167 /*
168  * Special device methods and definition.
169  */
170 static d_open_t		audit_pipe_open;
171 static d_close_t	audit_pipe_close;
172 static d_read_t		audit_pipe_read;
173 static d_ioctl_t	audit_pipe_ioctl;
174 static d_poll_t		audit_pipe_poll;
175 static d_kqfilter_t	audit_pipe_kqfilter;
176 
177 static struct cdevsw	audit_pipe_cdevsw = {
178 	.d_version =	D_VERSION,
179 	.d_flags =	D_PSEUDO | D_NEEDGIANT,
180 	.d_open =	audit_pipe_open,
181 	.d_close =	audit_pipe_close,
182 	.d_read =	audit_pipe_read,
183 	.d_ioctl =	audit_pipe_ioctl,
184 	.d_poll =	audit_pipe_poll,
185 	.d_kqfilter =	audit_pipe_kqfilter,
186 	.d_name =	AUDIT_PIPE_NAME,
187 };
188 
189 static int	audit_pipe_kqread(struct knote *note, long hint);
190 static void	audit_pipe_kqdetach(struct knote *note);
191 
192 static struct filterops audit_pipe_read_filterops = {
193 	.f_isfd =	1,
194 	.f_attach =	NULL,
195 	.f_detach =	audit_pipe_kqdetach,
196 	.f_event =	audit_pipe_kqread,
197 };
198 
199 /*
200  * Some global statistics on audit pipes.
201  */
202 static int		audit_pipe_count;	/* Current number of pipes. */
203 static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
204 static u_int64_t	audit_pipe_records;	/* Records seen. */
205 static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
206 
207 /*
208  * Free an audit pipe entry.
209  */
210 static void
211 audit_pipe_entry_free(struct audit_pipe_entry *ape)
212 {
213 
214 	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
215 	free(ape, M_AUDIT_PIPE_ENTRY);
216 }
217 
218 /*
219  * Find an audit pipe preselection specification for an auid, if any.
220  */
221 static struct audit_pipe_preselect *
222 audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
223 {
224 	struct audit_pipe_preselect *app;
225 
226 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
227 
228 	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
229 		if (app->app_auid == auid)
230 			return (app);
231 	}
232 	return (NULL);
233 }
234 
235 /*
236  * Query the per-pipe mask for a specific auid.
237  */
238 static int
239 audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
240     au_mask_t *maskp)
241 {
242 	struct audit_pipe_preselect *app;
243 	int error;
244 
245 	mtx_lock(&audit_pipe_mtx);
246 	app = audit_pipe_preselect_find(ap, auid);
247 	if (app != NULL) {
248 		*maskp = app->app_mask;
249 		error = 0;
250 	} else
251 		error = ENOENT;
252 	mtx_unlock(&audit_pipe_mtx);
253 	return (error);
254 }
255 
256 /*
257  * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
258  * otherwise, update the current entry.
259  */
260 static void
261 audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
262 {
263 	struct audit_pipe_preselect *app, *app_new;
264 
265 	/*
266 	 * Pessimistically assume that the auid doesn't already have a mask
267 	 * set, and allocate.  We will free it if it is unneeded.
268 	 */
269 	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
270 	mtx_lock(&audit_pipe_mtx);
271 	app = audit_pipe_preselect_find(ap, auid);
272 	if (app == NULL) {
273 		app = app_new;
274 		app_new = NULL;
275 		app->app_auid = auid;
276 		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
277 	}
278 	app->app_mask = mask;
279 	mtx_unlock(&audit_pipe_mtx);
280 	if (app_new != NULL)
281 		free(app_new, M_AUDIT_PIPE_PRESELECT);
282 }
283 
284 /*
285  * Delete a per-auid mask on an audit pipe.
286  */
287 static int
288 audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
289 {
290 	struct audit_pipe_preselect *app;
291 	int error;
292 
293 	mtx_lock(&audit_pipe_mtx);
294 	app = audit_pipe_preselect_find(ap, auid);
295 	if (app != NULL) {
296 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
297 		error = 0;
298 	} else
299 		error = ENOENT;
300 	mtx_unlock(&audit_pipe_mtx);
301 	if (app != NULL)
302 		free(app, M_AUDIT_PIPE_PRESELECT);
303 	return (error);
304 }
305 
306 /*
307  * Delete all per-auid masks on an audit pipe.
308  */
309 static void
310 audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
311 {
312 	struct audit_pipe_preselect *app;
313 
314 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
315 
316 	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
317 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
318 		free(app, M_AUDIT_PIPE_PRESELECT);
319 	}
320 }
321 
322 static void
323 audit_pipe_preselect_flush(struct audit_pipe *ap)
324 {
325 
326 	mtx_lock(&audit_pipe_mtx);
327 	audit_pipe_preselect_flush_locked(ap);
328 	mtx_unlock(&audit_pipe_mtx);
329 }
330 
331 /*
332  * Determine whether a specific audit pipe matches a record with these
333  * properties.  Algorithm is as follows:
334  *
335  * - If the pipe is configured to track the default trail configuration, then
336  *   use the results of global preselection matching.
337  * - If not, search for a specifically configured auid entry matching the
338  *   event.  If an entry is found, use that.
339  * - Otherwise, use the default flags or naflags configured for the pipe.
340  */
341 static int
342 audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
343     au_event_t event, au_class_t class, int sorf, int trail_preselect)
344 {
345 	struct audit_pipe_preselect *app;
346 
347 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
348 
349 	switch (ap->ap_preselect_mode) {
350 	case AUDITPIPE_PRESELECT_MODE_TRAIL:
351 		return (trail_preselect);
352 
353 	case AUDITPIPE_PRESELECT_MODE_LOCAL:
354 		app = audit_pipe_preselect_find(ap, auid);
355 		if (app == NULL) {
356 			if (auid == AU_DEFAUDITID)
357 				return (au_preselect(event, class,
358 				    &ap->ap_preselect_naflags, sorf));
359 			else
360 				return (au_preselect(event, class,
361 				    &ap->ap_preselect_flags, sorf));
362 		} else
363 			return (au_preselect(event, class, &app->app_mask,
364 			    sorf));
365 
366 	default:
367 		panic("audit_pipe_preselect_check: mode %d",
368 		    ap->ap_preselect_mode);
369 	}
370 
371 	return (0);
372 }
373 
374 /*
375  * Determine whether there exists a pipe interested in a record with specific
376  * properties.
377  */
378 int
379 audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
380     int sorf, int trail_preselect)
381 {
382 	struct audit_pipe *ap;
383 
384 	mtx_lock(&audit_pipe_mtx);
385 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
386 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
387 		    trail_preselect)) {
388 			mtx_unlock(&audit_pipe_mtx);
389 			return (1);
390 		}
391 	}
392 	mtx_unlock(&audit_pipe_mtx);
393 	return (0);
394 }
395 
396 /*
397  * Append individual record to a queue -- allocate queue-local buffer, and
398  * add to the queue.  We try to drop from the head of the queue so that more
399  * recent events take precedence over older ones, but if allocation fails we
400  * do drop the new event.
401  */
402 static void
403 audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
404 {
405 	struct audit_pipe_entry *ape, *ape_remove;
406 
407 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
408 
409 	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
410 	if (ape == NULL) {
411 		ap->ap_drops++;
412 		audit_pipe_drops++;
413 		return;
414 	}
415 
416 	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
417 	if (ape->ape_record == NULL) {
418 		free(ape, M_AUDIT_PIPE_ENTRY);
419 		ap->ap_drops++;
420 		audit_pipe_drops++;
421 		return;
422 	}
423 
424 	bcopy(record, ape->ape_record, record_len);
425 	ape->ape_record_len = record_len;
426 
427 	if (ap->ap_qlen >= ap->ap_qlimit) {
428 		ape_remove = TAILQ_FIRST(&ap->ap_queue);
429 		TAILQ_REMOVE(&ap->ap_queue, ape_remove, ape_queue);
430 		audit_pipe_entry_free(ape_remove);
431 		ap->ap_qlen--;
432 		ap->ap_drops++;
433 		audit_pipe_drops++;
434 	}
435 
436 	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
437 	ap->ap_inserts++;
438 	ap->ap_qlen++;
439 	selwakeuppri(&ap->ap_selinfo, PSOCK);
440 	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
441 	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
442 		pgsigio(&ap->ap_sigio, SIGIO, 0);
443 }
444 
445 /*
446  * audit_pipe_submit(): audit_worker submits audit records via this
447  * interface, which arranges for them to be delivered to pipe queues.
448  */
449 void
450 audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
451     int trail_select, void *record, u_int record_len)
452 {
453 	struct audit_pipe *ap;
454 
455 	/*
456 	 * Lockless read to avoid mutex overhead if pipes are not in use.
457 	 */
458 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
459 		return;
460 
461 	mtx_lock(&audit_pipe_mtx);
462 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
463 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
464 		    trail_select))
465 			audit_pipe_append(ap, record, record_len);
466 	}
467 	audit_pipe_records++;
468 	mtx_unlock(&audit_pipe_mtx);
469 	cv_signal(&audit_pipe_cv);
470 }
471 
472 /*
473  * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
474  * since we don't currently have selection information available, it is
475  * delivered to the pipe unconditionally.
476  *
477  * XXXRW: This is a bug.  The BSM check routine for submitting a user record
478  * should parse that information and return it.
479  */
480 void
481 audit_pipe_submit_user(void *record, u_int record_len)
482 {
483 	struct audit_pipe *ap;
484 
485 	/*
486 	 * Lockless read to avoid mutex overhead if pipes are not in use.
487 	 */
488 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
489 		return;
490 
491 	mtx_lock(&audit_pipe_mtx);
492 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list)
493 		audit_pipe_append(ap, record, record_len);
494 	audit_pipe_records++;
495 	mtx_unlock(&audit_pipe_mtx);
496 	cv_signal(&audit_pipe_cv);
497 }
498 
499 
500 /*
501  * Pop the next record off of an audit pipe.
502  */
503 static struct audit_pipe_entry *
504 audit_pipe_pop(struct audit_pipe *ap)
505 {
506 	struct audit_pipe_entry *ape;
507 
508 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
509 
510 	ape = TAILQ_FIRST(&ap->ap_queue);
511 	KASSERT((ape == NULL && ap->ap_qlen == 0) ||
512 	    (ape != NULL && ap->ap_qlen != 0), ("audit_pipe_pop: qlen"));
513 	if (ape == NULL)
514 		return (NULL);
515 	TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
516 	ap->ap_qlen--;
517 	return (ape);
518 }
519 
520 /*
521  * Allocate a new audit pipe.  Connects the pipe, on success, to the global
522  * list and updates statistics.
523  */
524 static struct audit_pipe *
525 audit_pipe_alloc(void)
526 {
527 	struct audit_pipe *ap;
528 
529 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
530 
531 	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
532 	if (ap == NULL)
533 		return (NULL);
534 	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
535 	TAILQ_INIT(&ap->ap_queue);
536 	knlist_init(&ap->ap_selinfo.si_note, &audit_pipe_mtx, NULL, NULL,
537 	    NULL);
538 
539 	/*
540 	 * Default flags, naflags, and auid-specific preselection settings to
541 	 * 0.  Initialize the mode to the global trail so that if praudit(1)
542 	 * is run on /dev/auditpipe, it sees events associated with the
543 	 * default trail.  Pipe-aware application can clear the flag, set
544 	 * custom masks, and flush the pipe as needed.
545 	 */
546 	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
547 	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
548 	TAILQ_INIT(&ap->ap_preselect_list);
549 	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
550 
551 	/*
552 	 * Add to global list and update global statistics.
553 	 */
554 	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
555 	audit_pipe_count++;
556 	audit_pipe_ever++;
557 
558 	return (ap);
559 }
560 
561 /*
562  * Flush all records currently present in an audit pipe; assume mutex is held.
563  */
564 static void
565 audit_pipe_flush(struct audit_pipe *ap)
566 {
567 	struct audit_pipe_entry *ape;
568 
569 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
570 
571 	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
572 		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
573 		audit_pipe_entry_free(ape);
574 		ap->ap_qlen--;
575 	}
576 	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qlen"));
577 }
578 
579 /*
580  * Free an audit pipe; this means freeing all preselection state and all
581  * records in the pipe.  Assumes mutex is held to prevent any new records
582  * from being inserted during the free, and that the audit pipe is still on
583  * the global list.
584  */
585 static void
586 audit_pipe_free(struct audit_pipe *ap)
587 {
588 
589 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
590 
591 	audit_pipe_preselect_flush_locked(ap);
592 	audit_pipe_flush(ap);
593 	knlist_destroy(&ap->ap_selinfo.si_note);
594 	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
595 	free(ap, M_AUDIT_PIPE);
596 	audit_pipe_count--;
597 }
598 
599 /*
600  * Audit pipe clone routine -- provide specific requested audit pipe, or a
601  * fresh one if a specific one is not requested.
602  */
603 static void
604 audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
605     struct cdev **dev)
606 {
607 	int i, u;
608 
609 	if (*dev != NULL)
610 		return;
611 
612 	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
613 		u = -1;
614 	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
615 		return;
616 
617 	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
618 	if (i) {
619 		*dev = make_dev(&audit_pipe_cdevsw, unit2minor(u), UID_ROOT,
620 		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
621 		if (*dev != NULL) {
622 			dev_ref(*dev);
623 			(*dev)->si_flags |= SI_CHEAPCLONE;
624 		}
625 	}
626 }
627 
628 /*
629  * Audit pipe open method.  Explicit privilege check isn't used as this
630  * allows file permissions on the special device to be used to grant audit
631  * review access.  Those file permissions should be managed carefully.
632  */
633 static int
634 audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
635 {
636 	struct audit_pipe *ap;
637 
638 	mtx_lock(&audit_pipe_mtx);
639 	ap = dev->si_drv1;
640 	if (ap == NULL) {
641 		ap = audit_pipe_alloc();
642 		if (ap == NULL) {
643 			mtx_unlock(&audit_pipe_mtx);
644 			return (ENOMEM);
645 		}
646 		dev->si_drv1 = ap;
647 	} else {
648 		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
649 		mtx_unlock(&audit_pipe_mtx);
650 		return (EBUSY);
651 	}
652 	ap->ap_open = 1;
653 	mtx_unlock(&audit_pipe_mtx);
654 	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
655 	return (0);
656 }
657 
658 /*
659  * Close audit pipe, tear down all records, etc.
660  */
661 static int
662 audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
663 {
664 	struct audit_pipe *ap;
665 
666 	ap = dev->si_drv1;
667 	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
668 	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
669 	funsetown(&ap->ap_sigio);
670 	mtx_lock(&audit_pipe_mtx);
671 	ap->ap_open = 0;
672 	audit_pipe_free(ap);
673 	dev->si_drv1 = NULL;
674 	mtx_unlock(&audit_pipe_mtx);
675 	return (0);
676 }
677 
678 /*
679  * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
680  * commands.
681  *
682  * Would be desirable to support filtering, although perhaps something simple
683  * like an event mask, as opposed to something complicated like BPF.
684  */
685 static int
686 audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
687     struct thread *td)
688 {
689 	struct auditpipe_ioctl_preselect *aip;
690 	struct audit_pipe *ap;
691 	au_mask_t *maskp;
692 	int error, mode;
693 	au_id_t auid;
694 
695 	ap = dev->si_drv1;
696 	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
697 
698 	/*
699 	 * Audit pipe ioctls: first come standard device node ioctls, then
700 	 * manipulation of pipe settings, and finally, statistics query
701 	 * ioctls.
702 	 */
703 	switch (cmd) {
704 	case FIONBIO:
705 		mtx_lock(&audit_pipe_mtx);
706 		if (*(int *)data)
707 			ap->ap_flags |= AUDIT_PIPE_NBIO;
708 		else
709 			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
710 		mtx_unlock(&audit_pipe_mtx);
711 		error = 0;
712 		break;
713 
714 	case FIONREAD:
715 		mtx_lock(&audit_pipe_mtx);
716 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
717 			*(int *)data =
718 			    TAILQ_FIRST(&ap->ap_queue)->ape_record_len;
719 		else
720 			*(int *)data = 0;
721 		mtx_unlock(&audit_pipe_mtx);
722 		error = 0;
723 		break;
724 
725 	case FIOASYNC:
726 		mtx_lock(&audit_pipe_mtx);
727 		if (*(int *)data)
728 			ap->ap_flags |= AUDIT_PIPE_ASYNC;
729 		else
730 			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
731 		mtx_unlock(&audit_pipe_mtx);
732 		error = 0;
733 		break;
734 
735 	case FIOSETOWN:
736 		error = fsetown(*(int *)data, &ap->ap_sigio);
737 		break;
738 
739 	case FIOGETOWN:
740 		*(int *)data = fgetown(&ap->ap_sigio);
741 		error = 0;
742 		break;
743 
744 	case AUDITPIPE_GET_QLEN:
745 		*(u_int *)data = ap->ap_qlen;
746 		error = 0;
747 		break;
748 
749 	case AUDITPIPE_GET_QLIMIT:
750 		*(u_int *)data = ap->ap_qlimit;
751 		error = 0;
752 		break;
753 
754 	case AUDITPIPE_SET_QLIMIT:
755 		/* Lockless integer write. */
756 		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
757 		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
758 			ap->ap_qlimit = *(u_int *)data;
759 			error = 0;
760 		} else
761 			error = EINVAL;
762 		break;
763 
764 	case AUDITPIPE_GET_QLIMIT_MIN:
765 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
766 		error = 0;
767 		break;
768 
769 	case AUDITPIPE_GET_QLIMIT_MAX:
770 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
771 		error = 0;
772 		break;
773 
774 	case AUDITPIPE_GET_PRESELECT_FLAGS:
775 		mtx_lock(&audit_pipe_mtx);
776 		maskp = (au_mask_t *)data;
777 		*maskp = ap->ap_preselect_flags;
778 		mtx_unlock(&audit_pipe_mtx);
779 		error = 0;
780 		break;
781 
782 	case AUDITPIPE_SET_PRESELECT_FLAGS:
783 		mtx_lock(&audit_pipe_mtx);
784 		maskp = (au_mask_t *)data;
785 		ap->ap_preselect_flags = *maskp;
786 		mtx_unlock(&audit_pipe_mtx);
787 		error = 0;
788 		break;
789 
790 	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
791 		mtx_lock(&audit_pipe_mtx);
792 		maskp = (au_mask_t *)data;
793 		*maskp = ap->ap_preselect_naflags;
794 		mtx_unlock(&audit_pipe_mtx);
795 		error = 0;
796 		break;
797 
798 	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
799 		mtx_lock(&audit_pipe_mtx);
800 		maskp = (au_mask_t *)data;
801 		ap->ap_preselect_naflags = *maskp;
802 		mtx_unlock(&audit_pipe_mtx);
803 		error = 0;
804 		break;
805 
806 	case AUDITPIPE_GET_PRESELECT_AUID:
807 		aip = (struct auditpipe_ioctl_preselect *)data;
808 		error = audit_pipe_preselect_get(ap, aip->aip_auid,
809 		    &aip->aip_mask);
810 		break;
811 
812 	case AUDITPIPE_SET_PRESELECT_AUID:
813 		aip = (struct auditpipe_ioctl_preselect *)data;
814 		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
815 		error = 0;
816 		break;
817 
818 	case AUDITPIPE_DELETE_PRESELECT_AUID:
819 		auid = *(au_id_t *)data;
820 		error = audit_pipe_preselect_delete(ap, auid);
821 		break;
822 
823 	case AUDITPIPE_FLUSH_PRESELECT_AUID:
824 		audit_pipe_preselect_flush(ap);
825 		error = 0;
826 		break;
827 
828 	case AUDITPIPE_GET_PRESELECT_MODE:
829 		mtx_lock(&audit_pipe_mtx);
830 		*(int *)data = ap->ap_preselect_mode;
831 		mtx_unlock(&audit_pipe_mtx);
832 		error = 0;
833 		break;
834 
835 	case AUDITPIPE_SET_PRESELECT_MODE:
836 		mode = *(int *)data;
837 		switch (mode) {
838 		case AUDITPIPE_PRESELECT_MODE_TRAIL:
839 		case AUDITPIPE_PRESELECT_MODE_LOCAL:
840 			mtx_lock(&audit_pipe_mtx);
841 			ap->ap_preselect_mode = mode;
842 			mtx_unlock(&audit_pipe_mtx);
843 			error = 0;
844 			break;
845 
846 		default:
847 			error = EINVAL;
848 		}
849 		break;
850 
851 	case AUDITPIPE_FLUSH:
852 		mtx_lock(&audit_pipe_mtx);
853 		audit_pipe_flush(ap);
854 		mtx_unlock(&audit_pipe_mtx);
855 		error = 0;
856 		break;
857 
858 	case AUDITPIPE_GET_MAXAUDITDATA:
859 		*(u_int *)data = MAXAUDITDATA;
860 		error = 0;
861 		break;
862 
863 	case AUDITPIPE_GET_INSERTS:
864 		*(u_int *)data = ap->ap_inserts;
865 		error = 0;
866 		break;
867 
868 	case AUDITPIPE_GET_READS:
869 		*(u_int *)data = ap->ap_reads;
870 		error = 0;
871 		break;
872 
873 	case AUDITPIPE_GET_DROPS:
874 		*(u_int *)data = ap->ap_drops;
875 		error = 0;
876 		break;
877 
878 	case AUDITPIPE_GET_TRUNCATES:
879 		*(u_int *)data = ap->ap_truncates;
880 		error = 0;
881 		break;
882 
883 	default:
884 		error = ENOTTY;
885 	}
886 	return (error);
887 }
888 
889 /*
890  * Audit pipe read.  Pull one record off the queue and copy to user space.
891  * On error, the record is dropped.
892  *
893  * Providing more sophisticated behavior, such as partial reads, is tricky
894  * due to the potential for parallel I/O.  If partial read support is
895  * required, it will require a per-pipe "current record being read" along
896  * with an offset into that trecord which has already been read.  Threads
897  * performing partial reads will need to allocate per-thread copies of the
898  * data so that if another thread completes the read of the record, it can be
899  * freed without adding reference count logic.  If this is added, a flag to
900  * indicate that only atomic record reads are desired would be useful, as if
901  * different threads are all waiting for records on the pipe, they will want
902  * independent record reads, which is currently the behavior.
903  */
904 static int
905 audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
906 {
907 	struct audit_pipe_entry *ape;
908 	struct audit_pipe *ap;
909 	int error;
910 
911 	ap = dev->si_drv1;
912 	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
913 	mtx_lock(&audit_pipe_mtx);
914 	do {
915 		/*
916 		 * Wait for a record that fits into the read buffer, dropping
917 		 * records that would be truncated if actually passed to the
918 		 * process.  This helps maintain the discreet record read
919 		 * interface.
920 		 */
921 		while ((ape = audit_pipe_pop(ap)) == NULL) {
922 			if (ap->ap_flags & AUDIT_PIPE_NBIO) {
923 				mtx_unlock(&audit_pipe_mtx);
924 				return (EAGAIN);
925 			}
926 			error = cv_wait_sig(&audit_pipe_cv, &audit_pipe_mtx);
927 			if (error) {
928 				mtx_unlock(&audit_pipe_mtx);
929 				return (error);
930 			}
931 		}
932 		if (ape->ape_record_len <= uio->uio_resid)
933 			break;
934 		audit_pipe_entry_free(ape);
935 		ap->ap_truncates++;
936 	} while (1);
937 	mtx_unlock(&audit_pipe_mtx);
938 
939 	/*
940 	 * Now read record to user space memory.  Even if the read is short,
941 	 * we abandon the remainder of the record, supporting only discreet
942 	 * record reads.
943 	 */
944 	error = uiomove(ape->ape_record, ape->ape_record_len, uio);
945 	audit_pipe_entry_free(ape);
946 	return (error);
947 }
948 
949 /*
950  * Audit pipe poll.
951  */
952 static int
953 audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
954 {
955 	struct audit_pipe *ap;
956 	int revents;
957 
958 	revents = 0;
959 	ap = dev->si_drv1;
960 	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
961 	if (events & (POLLIN | POLLRDNORM)) {
962 		mtx_lock(&audit_pipe_mtx);
963 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
964 			revents |= events & (POLLIN | POLLRDNORM);
965 		else
966 			selrecord(td, &ap->ap_selinfo);
967 		mtx_unlock(&audit_pipe_mtx);
968 	}
969 	return (revents);
970 }
971 
972 /*
973  * Audit pipe kqfilter.
974  */
975 static int
976 audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
977 {
978 	struct audit_pipe *ap;
979 
980 	ap = dev->si_drv1;
981 	KASSERT(ap != NULL, ("audit_pipe_kqfilter: ap == NULL"));
982 
983 	if (kn->kn_filter != EVFILT_READ)
984 		return (EINVAL);
985 
986 	kn->kn_fop = &audit_pipe_read_filterops;
987 	kn->kn_hook = ap;
988 
989 	mtx_lock(&audit_pipe_mtx);
990 	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
991 	mtx_unlock(&audit_pipe_mtx);
992 	return (0);
993 }
994 
995 /*
996  * Return true if there are records available for reading on the pipe.
997  */
998 static int
999 audit_pipe_kqread(struct knote *kn, long hint)
1000 {
1001 	struct audit_pipe_entry *ape;
1002 	struct audit_pipe *ap;
1003 
1004 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
1005 
1006 	ap = (struct audit_pipe *)kn->kn_hook;
1007 	KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL"));
1008 
1009 	if (ap->ap_qlen != 0) {
1010 		ape = TAILQ_FIRST(&ap->ap_queue);
1011 		KASSERT(ape != NULL, ("audit_pipe_kqread: ape == NULL"));
1012 
1013 		kn->kn_data = ape->ape_record_len;
1014 		return (1);
1015 	} else {
1016 		kn->kn_data = 0;
1017 		return (0);
1018 	}
1019 }
1020 
1021 /*
1022  * Detach kqueue state from audit pipe.
1023  */
1024 static void
1025 audit_pipe_kqdetach(struct knote *kn)
1026 {
1027 	struct audit_pipe *ap;
1028 
1029 	ap = (struct audit_pipe *)kn->kn_hook;
1030 	KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL"));
1031 
1032 	mtx_lock(&audit_pipe_mtx);
1033 	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1034 	mtx_unlock(&audit_pipe_mtx);
1035 }
1036 
1037 /*
1038  * Initialize the audit pipe system.
1039  */
1040 static void
1041 audit_pipe_init(void *unused)
1042 {
1043 
1044 	TAILQ_INIT(&audit_pipe_list);
1045 	mtx_init(&audit_pipe_mtx, "audit_pipe_mtx", NULL, MTX_DEF);
1046 	cv_init(&audit_pipe_cv, "audit_pipe_cv");
1047 
1048 	clone_setup(&audit_pipe_clones);
1049 	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
1050 	    audit_pipe_clone, 0, 1000);
1051 	if (audit_pipe_eh_tag == NULL)
1052 		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
1053 }
1054 
1055 SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1056     NULL);
1057