xref: /linux/kernel/auditsc.c (revision 01116105)
1 /* auditsc.c -- System-call auditing support
2  * Handles all system-call specific auditing features.
3  *
4  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
5  * All Rights Reserved.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  * Written by Rickard E. (Rik) Faith <faith@redhat.com>
22  *
23  * Many of the ideas implemented here are from Stephen C. Tweedie,
24  * especially the idea of avoiding a copy by using getname.
25  *
26  * The method for actual interception of syscall entry and exit (not in
27  * this file -- see entry.S) is based on a GPL'd patch written by
28  * okir@suse.de and Copyright 2003 SuSE Linux AG.
29  *
30  */
31 
32 #include <linux/init.h>
33 #include <asm/atomic.h>
34 #include <asm/types.h>
35 #include <linux/mm.h>
36 #include <linux/module.h>
37 #include <linux/mount.h>
38 #include <linux/socket.h>
39 #include <linux/audit.h>
40 #include <linux/personality.h>
41 #include <linux/time.h>
42 #include <asm/unistd.h>
43 
44 /* 0 = no checking
45    1 = put_count checking
46    2 = verbose put_count checking
47 */
48 #define AUDIT_DEBUG 0
49 
50 /* No syscall auditing will take place unless audit_enabled != 0. */
51 extern int audit_enabled;
52 
53 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
54  * for saving names from getname(). */
55 #define AUDIT_NAMES    20
56 
57 /* AUDIT_NAMES_RESERVED is the number of slots we reserve in the
58  * audit_context from being used for nameless inodes from
59  * path_lookup. */
60 #define AUDIT_NAMES_RESERVED 7
61 
62 /* At task start time, the audit_state is set in the audit_context using
63    a per-task filter.  At syscall entry, the audit_state is augmented by
64    the syscall filter. */
65 enum audit_state {
66 	AUDIT_DISABLED,		/* Do not create per-task audit_context.
67 				 * No syscall-specific audit records can
68 				 * be generated. */
69 	AUDIT_SETUP_CONTEXT,	/* Create the per-task audit_context,
70 				 * but don't necessarily fill it in at
71 				 * syscall entry time (i.e., filter
72 				 * instead). */
73 	AUDIT_BUILD_CONTEXT,	/* Create the per-task audit_context,
74 				 * and always fill it in at syscall
75 				 * entry time.  This makes a full
76 				 * syscall record available if some
77 				 * other part of the kernel decides it
78 				 * should be recorded. */
79 	AUDIT_RECORD_CONTEXT	/* Create the per-task audit_context,
80 				 * always fill it in at syscall entry
81 				 * time, and always write out the audit
82 				 * record at syscall exit time.  */
83 };
84 
85 /* When fs/namei.c:getname() is called, we store the pointer in name and
86  * we don't let putname() free it (instead we free all of the saved
87  * pointers at syscall exit time).
88  *
89  * Further, in fs/namei.c:path_lookup() we store the inode and device. */
90 struct audit_names {
91 	const char	*name;
92 	unsigned long	ino;
93 	dev_t		dev;
94 	umode_t		mode;
95 	uid_t		uid;
96 	gid_t		gid;
97 	dev_t		rdev;
98 };
99 
100 struct audit_aux_data {
101 	struct audit_aux_data	*next;
102 	int			type;
103 };
104 
105 #define AUDIT_AUX_IPCPERM	0
106 
107 struct audit_aux_data_ipcctl {
108 	struct audit_aux_data	d;
109 	struct ipc_perm		p;
110 	unsigned long		qbytes;
111 	uid_t			uid;
112 	gid_t			gid;
113 	mode_t			mode;
114 };
115 
116 struct audit_aux_data_socketcall {
117 	struct audit_aux_data	d;
118 	int			nargs;
119 	unsigned long		args[0];
120 };
121 
122 struct audit_aux_data_sockaddr {
123 	struct audit_aux_data	d;
124 	int			len;
125 	char			a[0];
126 };
127 
128 struct audit_aux_data_path {
129 	struct audit_aux_data	d;
130 	struct dentry		*dentry;
131 	struct vfsmount		*mnt;
132 };
133 
134 /* The per-task audit context. */
135 struct audit_context {
136 	int		    in_syscall;	/* 1 if task is in a syscall */
137 	enum audit_state    state;
138 	unsigned int	    serial;     /* serial number for record */
139 	struct timespec	    ctime;      /* time of syscall entry */
140 	uid_t		    loginuid;   /* login uid (identity) */
141 	int		    major;      /* syscall number */
142 	unsigned long	    argv[4];    /* syscall arguments */
143 	int		    return_valid; /* return code is valid */
144 	long		    return_code;/* syscall return code */
145 	int		    auditable;  /* 1 if record should be written */
146 	int		    name_count;
147 	struct audit_names  names[AUDIT_NAMES];
148 	struct audit_context *previous; /* For nested syscalls */
149 	struct audit_aux_data *aux;
150 
151 				/* Save things to print about task_struct */
152 	pid_t		    pid;
153 	uid_t		    uid, euid, suid, fsuid;
154 	gid_t		    gid, egid, sgid, fsgid;
155 	unsigned long	    personality;
156 	int		    arch;
157 
158 #if AUDIT_DEBUG
159 	int		    put_count;
160 	int		    ino_count;
161 #endif
162 };
163 
164 				/* Public API */
165 /* There are three lists of rules -- one to search at task creation
166  * time, one to search at syscall entry time, and another to search at
167  * syscall exit time. */
168 static LIST_HEAD(audit_tsklist);
169 static LIST_HEAD(audit_entlist);
170 static LIST_HEAD(audit_extlist);
171 
172 struct audit_entry {
173 	struct list_head  list;
174 	struct rcu_head   rcu;
175 	struct audit_rule rule;
176 };
177 
178 extern int audit_pid;
179 
180 /* Check to see if two rules are identical.  It is called from
181  * audit_del_rule during AUDIT_DEL. */
182 static int audit_compare_rule(struct audit_rule *a, struct audit_rule *b)
183 {
184 	int i;
185 
186 	if (a->flags != b->flags)
187 		return 1;
188 
189 	if (a->action != b->action)
190 		return 1;
191 
192 	if (a->field_count != b->field_count)
193 		return 1;
194 
195 	for (i = 0; i < a->field_count; i++) {
196 		if (a->fields[i] != b->fields[i]
197 		    || a->values[i] != b->values[i])
198 			return 1;
199 	}
200 
201 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
202 		if (a->mask[i] != b->mask[i])
203 			return 1;
204 
205 	return 0;
206 }
207 
208 /* Note that audit_add_rule and audit_del_rule are called via
209  * audit_receive() in audit.c, and are protected by
210  * audit_netlink_sem. */
211 static inline int audit_add_rule(struct audit_entry *entry,
212 				 struct list_head *list)
213 {
214 	if (entry->rule.flags & AUDIT_PREPEND) {
215 		entry->rule.flags &= ~AUDIT_PREPEND;
216 		list_add_rcu(&entry->list, list);
217 	} else {
218 		list_add_tail_rcu(&entry->list, list);
219 	}
220 	return 0;
221 }
222 
223 static void audit_free_rule(struct rcu_head *head)
224 {
225 	struct audit_entry *e = container_of(head, struct audit_entry, rcu);
226 	kfree(e);
227 }
228 
229 /* Note that audit_add_rule and audit_del_rule are called via
230  * audit_receive() in audit.c, and are protected by
231  * audit_netlink_sem. */
232 static inline int audit_del_rule(struct audit_rule *rule,
233 				 struct list_head *list)
234 {
235 	struct audit_entry  *e;
236 
237 	/* Do not use the _rcu iterator here, since this is the only
238 	 * deletion routine. */
239 	list_for_each_entry(e, list, list) {
240 		if (!audit_compare_rule(rule, &e->rule)) {
241 			list_del_rcu(&e->list);
242 			call_rcu(&e->rcu, audit_free_rule);
243 			return 0;
244 		}
245 	}
246 	return -EFAULT;		/* No matching rule */
247 }
248 
249 /* Copy rule from user-space to kernel-space.  Called during
250  * AUDIT_ADD. */
251 static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s)
252 {
253 	int i;
254 
255 	if (s->action != AUDIT_NEVER
256 	    && s->action != AUDIT_POSSIBLE
257 	    && s->action != AUDIT_ALWAYS)
258 		return -1;
259 	if (s->field_count < 0 || s->field_count > AUDIT_MAX_FIELDS)
260 		return -1;
261 
262 	d->flags	= s->flags;
263 	d->action	= s->action;
264 	d->field_count	= s->field_count;
265 	for (i = 0; i < d->field_count; i++) {
266 		d->fields[i] = s->fields[i];
267 		d->values[i] = s->values[i];
268 	}
269 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++) d->mask[i] = s->mask[i];
270 	return 0;
271 }
272 
273 int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
274 							uid_t loginuid)
275 {
276 	u32		   flags;
277 	struct audit_entry *entry;
278 	int		   err = 0;
279 
280 	switch (type) {
281 	case AUDIT_LIST:
282 		/* The *_rcu iterators not needed here because we are
283 		   always called with audit_netlink_sem held. */
284 		list_for_each_entry(entry, &audit_tsklist, list)
285 			audit_send_reply(pid, seq, AUDIT_LIST, 0, 1,
286 					 &entry->rule, sizeof(entry->rule));
287 		list_for_each_entry(entry, &audit_entlist, list)
288 			audit_send_reply(pid, seq, AUDIT_LIST, 0, 1,
289 					 &entry->rule, sizeof(entry->rule));
290 		list_for_each_entry(entry, &audit_extlist, list)
291 			audit_send_reply(pid, seq, AUDIT_LIST, 0, 1,
292 					 &entry->rule, sizeof(entry->rule));
293 		audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
294 		break;
295 	case AUDIT_ADD:
296 		if (!(entry = kmalloc(sizeof(*entry), GFP_KERNEL)))
297 			return -ENOMEM;
298 		if (audit_copy_rule(&entry->rule, data)) {
299 			kfree(entry);
300 			return -EINVAL;
301 		}
302 		flags = entry->rule.flags;
303 		if (!err && (flags & AUDIT_PER_TASK))
304 			err = audit_add_rule(entry, &audit_tsklist);
305 		if (!err && (flags & AUDIT_AT_ENTRY))
306 			err = audit_add_rule(entry, &audit_entlist);
307 		if (!err && (flags & AUDIT_AT_EXIT))
308 			err = audit_add_rule(entry, &audit_extlist);
309 		audit_log(NULL, AUDIT_CONFIG_CHANGE,
310 				"auid %u added an audit rule\n", loginuid);
311 		break;
312 	case AUDIT_DEL:
313 		flags =((struct audit_rule *)data)->flags;
314 		if (!err && (flags & AUDIT_PER_TASK))
315 			err = audit_del_rule(data, &audit_tsklist);
316 		if (!err && (flags & AUDIT_AT_ENTRY))
317 			err = audit_del_rule(data, &audit_entlist);
318 		if (!err && (flags & AUDIT_AT_EXIT))
319 			err = audit_del_rule(data, &audit_extlist);
320 		audit_log(NULL, AUDIT_CONFIG_CHANGE,
321 				"auid %u removed an audit rule\n", loginuid);
322 		break;
323 	default:
324 		return -EINVAL;
325 	}
326 
327 	return err;
328 }
329 
330 /* Compare a task_struct with an audit_rule.  Return 1 on match, 0
331  * otherwise. */
332 static int audit_filter_rules(struct task_struct *tsk,
333 			      struct audit_rule *rule,
334 			      struct audit_context *ctx,
335 			      enum audit_state *state)
336 {
337 	int i, j;
338 
339 	for (i = 0; i < rule->field_count; i++) {
340 		u32 field  = rule->fields[i] & ~AUDIT_NEGATE;
341 		u32 value  = rule->values[i];
342 		int result = 0;
343 
344 		switch (field) {
345 		case AUDIT_PID:
346 			result = (tsk->pid == value);
347 			break;
348 		case AUDIT_UID:
349 			result = (tsk->uid == value);
350 			break;
351 		case AUDIT_EUID:
352 			result = (tsk->euid == value);
353 			break;
354 		case AUDIT_SUID:
355 			result = (tsk->suid == value);
356 			break;
357 		case AUDIT_FSUID:
358 			result = (tsk->fsuid == value);
359 			break;
360 		case AUDIT_GID:
361 			result = (tsk->gid == value);
362 			break;
363 		case AUDIT_EGID:
364 			result = (tsk->egid == value);
365 			break;
366 		case AUDIT_SGID:
367 			result = (tsk->sgid == value);
368 			break;
369 		case AUDIT_FSGID:
370 			result = (tsk->fsgid == value);
371 			break;
372 		case AUDIT_PERS:
373 			result = (tsk->personality == value);
374 			break;
375 		case AUDIT_ARCH:
376 			if (ctx)
377 				result = (ctx->arch == value);
378 			break;
379 
380 		case AUDIT_EXIT:
381 			if (ctx && ctx->return_valid)
382 				result = (ctx->return_code == value);
383 			break;
384 		case AUDIT_SUCCESS:
385 			if (ctx && ctx->return_valid)
386 				result = (ctx->return_valid == AUDITSC_SUCCESS);
387 			break;
388 		case AUDIT_DEVMAJOR:
389 			if (ctx) {
390 				for (j = 0; j < ctx->name_count; j++) {
391 					if (MAJOR(ctx->names[j].dev)==value) {
392 						++result;
393 						break;
394 					}
395 				}
396 			}
397 			break;
398 		case AUDIT_DEVMINOR:
399 			if (ctx) {
400 				for (j = 0; j < ctx->name_count; j++) {
401 					if (MINOR(ctx->names[j].dev)==value) {
402 						++result;
403 						break;
404 					}
405 				}
406 			}
407 			break;
408 		case AUDIT_INODE:
409 			if (ctx) {
410 				for (j = 0; j < ctx->name_count; j++) {
411 					if (ctx->names[j].ino == value) {
412 						++result;
413 						break;
414 					}
415 				}
416 			}
417 			break;
418 		case AUDIT_LOGINUID:
419 			result = 0;
420 			if (ctx)
421 				result = (ctx->loginuid == value);
422 			break;
423 		case AUDIT_ARG0:
424 		case AUDIT_ARG1:
425 		case AUDIT_ARG2:
426 		case AUDIT_ARG3:
427 			if (ctx)
428 				result = (ctx->argv[field-AUDIT_ARG0]==value);
429 			break;
430 		}
431 
432 		if (rule->fields[i] & AUDIT_NEGATE)
433 			result = !result;
434 		if (!result)
435 			return 0;
436 	}
437 	switch (rule->action) {
438 	case AUDIT_NEVER:    *state = AUDIT_DISABLED;	    break;
439 	case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT;  break;
440 	case AUDIT_ALWAYS:   *state = AUDIT_RECORD_CONTEXT; break;
441 	}
442 	return 1;
443 }
444 
445 /* At process creation time, we can determine if system-call auditing is
446  * completely disabled for this task.  Since we only have the task
447  * structure at this point, we can only check uid and gid.
448  */
449 static enum audit_state audit_filter_task(struct task_struct *tsk)
450 {
451 	struct audit_entry *e;
452 	enum audit_state   state;
453 
454 	rcu_read_lock();
455 	list_for_each_entry_rcu(e, &audit_tsklist, list) {
456 		if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
457 			rcu_read_unlock();
458 			return state;
459 		}
460 	}
461 	rcu_read_unlock();
462 	return AUDIT_BUILD_CONTEXT;
463 }
464 
465 /* At syscall entry and exit time, this filter is called if the
466  * audit_state is not low enough that auditing cannot take place, but is
467  * also not high enough that we already know we have to write an audit
468  * record (i.e., the state is AUDIT_SETUP_CONTEXT or  AUDIT_BUILD_CONTEXT).
469  */
470 static enum audit_state audit_filter_syscall(struct task_struct *tsk,
471 					     struct audit_context *ctx,
472 					     struct list_head *list)
473 {
474 	struct audit_entry *e;
475 	enum audit_state   state;
476 	int		   word = AUDIT_WORD(ctx->major);
477 	int		   bit  = AUDIT_BIT(ctx->major);
478 
479 	rcu_read_lock();
480 	list_for_each_entry_rcu(e, list, list) {
481 		if ((e->rule.mask[word] & bit) == bit
482  		    && audit_filter_rules(tsk, &e->rule, ctx, &state)) {
483 			rcu_read_unlock();
484 			return state;
485 		}
486 	}
487 	rcu_read_unlock();
488 	return AUDIT_BUILD_CONTEXT;
489 }
490 
491 /* This should be called with task_lock() held. */
492 static inline struct audit_context *audit_get_context(struct task_struct *tsk,
493 						      int return_valid,
494 						      int return_code)
495 {
496 	struct audit_context *context = tsk->audit_context;
497 
498 	if (likely(!context))
499 		return NULL;
500 	context->return_valid = return_valid;
501 	context->return_code  = return_code;
502 
503 	if (context->in_syscall && !context->auditable) {
504 		enum audit_state state;
505 		state = audit_filter_syscall(tsk, context, &audit_extlist);
506 		if (state == AUDIT_RECORD_CONTEXT)
507 			context->auditable = 1;
508 	}
509 
510 	context->pid = tsk->pid;
511 	context->uid = tsk->uid;
512 	context->gid = tsk->gid;
513 	context->euid = tsk->euid;
514 	context->suid = tsk->suid;
515 	context->fsuid = tsk->fsuid;
516 	context->egid = tsk->egid;
517 	context->sgid = tsk->sgid;
518 	context->fsgid = tsk->fsgid;
519 	context->personality = tsk->personality;
520 	tsk->audit_context = NULL;
521 	return context;
522 }
523 
524 static inline void audit_free_names(struct audit_context *context)
525 {
526 	int i;
527 
528 #if AUDIT_DEBUG == 2
529 	if (context->auditable
530 	    ||context->put_count + context->ino_count != context->name_count) {
531 		printk(KERN_ERR "audit.c:%d(:%d): major=%d in_syscall=%d"
532 		       " name_count=%d put_count=%d"
533 		       " ino_count=%d [NOT freeing]\n",
534 		       __LINE__,
535 		       context->serial, context->major, context->in_syscall,
536 		       context->name_count, context->put_count,
537 		       context->ino_count);
538 		for (i = 0; i < context->name_count; i++)
539 			printk(KERN_ERR "names[%d] = %p = %s\n", i,
540 			       context->names[i].name,
541 			       context->names[i].name);
542 		dump_stack();
543 		return;
544 	}
545 #endif
546 #if AUDIT_DEBUG
547 	context->put_count  = 0;
548 	context->ino_count  = 0;
549 #endif
550 
551 	for (i = 0; i < context->name_count; i++)
552 		if (context->names[i].name)
553 			__putname(context->names[i].name);
554 	context->name_count = 0;
555 }
556 
557 static inline void audit_free_aux(struct audit_context *context)
558 {
559 	struct audit_aux_data *aux;
560 
561 	while ((aux = context->aux)) {
562 		if (aux->type == AUDIT_AVC_PATH) {
563 			struct audit_aux_data_path *axi = (void *)aux;
564 			dput(axi->dentry);
565 			mntput(axi->mnt);
566 		}
567 		context->aux = aux->next;
568 		kfree(aux);
569 	}
570 }
571 
572 static inline void audit_zero_context(struct audit_context *context,
573 				      enum audit_state state)
574 {
575 	uid_t loginuid = context->loginuid;
576 
577 	memset(context, 0, sizeof(*context));
578 	context->state      = state;
579 	context->loginuid   = loginuid;
580 }
581 
582 static inline struct audit_context *audit_alloc_context(enum audit_state state)
583 {
584 	struct audit_context *context;
585 
586 	if (!(context = kmalloc(sizeof(*context), GFP_KERNEL)))
587 		return NULL;
588 	audit_zero_context(context, state);
589 	return context;
590 }
591 
592 /* Filter on the task information and allocate a per-task audit context
593  * if necessary.  Doing so turns on system call auditing for the
594  * specified task.  This is called from copy_process, so no lock is
595  * needed. */
596 int audit_alloc(struct task_struct *tsk)
597 {
598 	struct audit_context *context;
599 	enum audit_state     state;
600 
601 	if (likely(!audit_enabled))
602 		return 0; /* Return if not auditing. */
603 
604 	state = audit_filter_task(tsk);
605 	if (likely(state == AUDIT_DISABLED))
606 		return 0;
607 
608 	if (!(context = audit_alloc_context(state))) {
609 		audit_log_lost("out of memory in audit_alloc");
610 		return -ENOMEM;
611 	}
612 
613 				/* Preserve login uid */
614 	context->loginuid = -1;
615 	if (current->audit_context)
616 		context->loginuid = current->audit_context->loginuid;
617 
618 	tsk->audit_context  = context;
619 	set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
620 	return 0;
621 }
622 
623 static inline void audit_free_context(struct audit_context *context)
624 {
625 	struct audit_context *previous;
626 	int		     count = 0;
627 
628 	do {
629 		previous = context->previous;
630 		if (previous || (count &&  count < 10)) {
631 			++count;
632 			printk(KERN_ERR "audit(:%d): major=%d name_count=%d:"
633 			       " freeing multiple contexts (%d)\n",
634 			       context->serial, context->major,
635 			       context->name_count, count);
636 		}
637 		audit_free_names(context);
638 		audit_free_aux(context);
639 		kfree(context);
640 		context  = previous;
641 	} while (context);
642 	if (count >= 10)
643 		printk(KERN_ERR "audit: freed %d contexts\n", count);
644 }
645 
646 static void audit_log_task_info(struct audit_buffer *ab)
647 {
648 	char name[sizeof(current->comm)];
649 	struct mm_struct *mm = current->mm;
650 	struct vm_area_struct *vma;
651 
652 	get_task_comm(name, current);
653 	audit_log_format(ab, " comm=%s", name);
654 
655 	if (!mm)
656 		return;
657 
658 	down_read(&mm->mmap_sem);
659 	vma = mm->mmap;
660 	while (vma) {
661 		if ((vma->vm_flags & VM_EXECUTABLE) &&
662 		    vma->vm_file) {
663 			audit_log_d_path(ab, "exe=",
664 					 vma->vm_file->f_dentry,
665 					 vma->vm_file->f_vfsmnt);
666 			break;
667 		}
668 		vma = vma->vm_next;
669 	}
670 	up_read(&mm->mmap_sem);
671 }
672 
673 static void audit_log_exit(struct audit_context *context)
674 {
675 	int i;
676 	struct audit_buffer *ab;
677 
678 	ab = audit_log_start(context, AUDIT_SYSCALL);
679 	if (!ab)
680 		return;		/* audit_panic has been called */
681 	audit_log_format(ab, "syscall=%d", context->major);
682 	if (context->personality != PER_LINUX)
683 		audit_log_format(ab, " per=%lx", context->personality);
684 	audit_log_format(ab, " arch=%x", context->arch);
685 	if (context->return_valid)
686 		audit_log_format(ab, " success=%s exit=%ld",
687 				 (context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
688 				 context->return_code);
689 	audit_log_format(ab,
690 		  " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
691 		  " pid=%d loginuid=%d uid=%d gid=%d"
692 		  " euid=%d suid=%d fsuid=%d"
693 		  " egid=%d sgid=%d fsgid=%d",
694 		  context->argv[0],
695 		  context->argv[1],
696 		  context->argv[2],
697 		  context->argv[3],
698 		  context->name_count,
699 		  context->pid,
700 		  context->loginuid,
701 		  context->uid,
702 		  context->gid,
703 		  context->euid, context->suid, context->fsuid,
704 		  context->egid, context->sgid, context->fsgid);
705 	audit_log_task_info(ab);
706 	audit_log_end(ab);
707 	while (context->aux) {
708 		struct audit_aux_data *aux;
709 
710 		aux = context->aux;
711 
712 		ab = audit_log_start(context, aux->type);
713 		if (!ab)
714 			continue; /* audit_panic has been called */
715 
716 		switch (aux->type) {
717 		case AUDIT_IPC: {
718 			struct audit_aux_data_ipcctl *axi = (void *)aux;
719 			audit_log_format(ab,
720 					 " qbytes=%lx iuid=%d igid=%d mode=%x",
721 					 axi->qbytes, axi->uid, axi->gid, axi->mode);
722 			break; }
723 
724 		case AUDIT_SOCKETCALL: {
725 			int i;
726 			struct audit_aux_data_socketcall *axs = (void *)aux;
727 			audit_log_format(ab, "nargs=%d", axs->nargs);
728 			for (i=0; i<axs->nargs; i++)
729 				audit_log_format(ab, " a%d=%lx", i, axs->args[i]);
730 			break; }
731 
732 		case AUDIT_SOCKADDR: {
733 			struct audit_aux_data_sockaddr *axs = (void *)aux;
734 
735 			audit_log_format(ab, "saddr=");
736 			audit_log_hex(ab, axs->a, axs->len);
737 			break; }
738 
739 		case AUDIT_AVC_PATH: {
740 			struct audit_aux_data_path *axi = (void *)aux;
741 			audit_log_d_path(ab, "path=", axi->dentry, axi->mnt);
742 			dput(axi->dentry);
743 			mntput(axi->mnt);
744 			break; }
745 
746 		}
747 		audit_log_end(ab);
748 
749 		context->aux = aux->next;
750 		kfree(aux);
751 	}
752 
753 	for (i = 0; i < context->name_count; i++) {
754 		ab = audit_log_start(context, AUDIT_PATH);
755 		if (!ab)
756 			continue; /* audit_panic has been called */
757 		audit_log_format(ab, "item=%d", i);
758 		if (context->names[i].name) {
759 			audit_log_format(ab, " name=");
760 			audit_log_untrustedstring(ab, context->names[i].name);
761 		}
762 		if (context->names[i].ino != (unsigned long)-1)
763 			audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o"
764 					     " ouid=%d ogid=%d rdev=%02x:%02x",
765 					 context->names[i].ino,
766 					 MAJOR(context->names[i].dev),
767 					 MINOR(context->names[i].dev),
768 					 context->names[i].mode,
769 					 context->names[i].uid,
770 					 context->names[i].gid,
771 					 MAJOR(context->names[i].rdev),
772 					 MINOR(context->names[i].rdev));
773 		audit_log_end(ab);
774 	}
775 }
776 
777 /* Free a per-task audit context.  Called from copy_process and
778  * __put_task_struct. */
779 void audit_free(struct task_struct *tsk)
780 {
781 	struct audit_context *context;
782 
783 	task_lock(tsk);
784 	context = audit_get_context(tsk, 0, 0);
785 	task_unlock(tsk);
786 
787 	if (likely(!context))
788 		return;
789 
790 	/* Check for system calls that do not go through the exit
791 	 * function (e.g., exit_group), then free context block. */
792 	if (context->in_syscall && context->auditable && context->pid != audit_pid)
793 		audit_log_exit(context);
794 
795 	audit_free_context(context);
796 }
797 
798 /* Compute a serial number for the audit record.  Audit records are
799  * written to user-space as soon as they are generated, so a complete
800  * audit record may be written in several pieces.  The timestamp of the
801  * record and this serial number are used by the user-space tools to
802  * determine which pieces belong to the same audit record.  The
803  * (timestamp,serial) tuple is unique for each syscall and is live from
804  * syscall entry to syscall exit.
805  *
806  * Atomic values are only guaranteed to be 24-bit, so we count down.
807  *
808  * NOTE: Another possibility is to store the formatted records off the
809  * audit context (for those records that have a context), and emit them
810  * all at syscall exit.  However, this could delay the reporting of
811  * significant errors until syscall exit (or never, if the system
812  * halts). */
813 static inline unsigned int audit_serial(void)
814 {
815 	static atomic_t serial = ATOMIC_INIT(0xffffff);
816 	unsigned int a, b;
817 
818 	do {
819 		a = atomic_read(&serial);
820 		if (atomic_dec_and_test(&serial))
821 			atomic_set(&serial, 0xffffff);
822 		b = atomic_read(&serial);
823 	} while (b != a - 1);
824 
825 	return 0xffffff - b;
826 }
827 
828 /* Fill in audit context at syscall entry.  This only happens if the
829  * audit context was created when the task was created and the state or
830  * filters demand the audit context be built.  If the state from the
831  * per-task filter or from the per-syscall filter is AUDIT_RECORD_CONTEXT,
832  * then the record will be written at syscall exit time (otherwise, it
833  * will only be written if another part of the kernel requests that it
834  * be written). */
835 void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
836 			 unsigned long a1, unsigned long a2,
837 			 unsigned long a3, unsigned long a4)
838 {
839 	struct audit_context *context = tsk->audit_context;
840 	enum audit_state     state;
841 
842 	BUG_ON(!context);
843 
844 	/* This happens only on certain architectures that make system
845 	 * calls in kernel_thread via the entry.S interface, instead of
846 	 * with direct calls.  (If you are porting to a new
847 	 * architecture, hitting this condition can indicate that you
848 	 * got the _exit/_leave calls backward in entry.S.)
849 	 *
850 	 * i386     no
851 	 * x86_64   no
852 	 * ppc64    yes (see arch/ppc64/kernel/misc.S)
853 	 *
854 	 * This also happens with vm86 emulation in a non-nested manner
855 	 * (entries without exits), so this case must be caught.
856 	 */
857 	if (context->in_syscall) {
858 		struct audit_context *newctx;
859 
860 #if defined(__NR_vm86) && defined(__NR_vm86old)
861 		/* vm86 mode should only be entered once */
862 		if (major == __NR_vm86 || major == __NR_vm86old)
863 			return;
864 #endif
865 #if AUDIT_DEBUG
866 		printk(KERN_ERR
867 		       "audit(:%d) pid=%d in syscall=%d;"
868 		       " entering syscall=%d\n",
869 		       context->serial, tsk->pid, context->major, major);
870 #endif
871 		newctx = audit_alloc_context(context->state);
872 		if (newctx) {
873 			newctx->previous   = context;
874 			context		   = newctx;
875 			tsk->audit_context = newctx;
876 		} else	{
877 			/* If we can't alloc a new context, the best we
878 			 * can do is to leak memory (any pending putname
879 			 * will be lost).  The only other alternative is
880 			 * to abandon auditing. */
881 			audit_zero_context(context, context->state);
882 		}
883 	}
884 	BUG_ON(context->in_syscall || context->name_count);
885 
886 	if (!audit_enabled)
887 		return;
888 
889 	context->arch	    = arch;
890 	context->major      = major;
891 	context->argv[0]    = a1;
892 	context->argv[1]    = a2;
893 	context->argv[2]    = a3;
894 	context->argv[3]    = a4;
895 
896 	state = context->state;
897 	if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT)
898 		state = audit_filter_syscall(tsk, context, &audit_entlist);
899 	if (likely(state == AUDIT_DISABLED))
900 		return;
901 
902 	context->serial     = audit_serial();
903 	context->ctime      = CURRENT_TIME;
904 	context->in_syscall = 1;
905 	context->auditable  = !!(state == AUDIT_RECORD_CONTEXT);
906 }
907 
908 /* Tear down after system call.  If the audit context has been marked as
909  * auditable (either because of the AUDIT_RECORD_CONTEXT state from
910  * filtering, or because some other part of the kernel write an audit
911  * message), then write out the syscall information.  In call cases,
912  * free the names stored from getname(). */
913 void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
914 {
915 	struct audit_context *context;
916 
917 	get_task_struct(tsk);
918 	task_lock(tsk);
919 	context = audit_get_context(tsk, valid, return_code);
920 	task_unlock(tsk);
921 
922 	/* Not having a context here is ok, since the parent may have
923 	 * called __put_task_struct. */
924 	if (likely(!context))
925 		return;
926 
927 	if (context->in_syscall && context->auditable && context->pid != audit_pid)
928 		audit_log_exit(context);
929 
930 	context->in_syscall = 0;
931 	context->auditable  = 0;
932 
933 	if (context->previous) {
934 		struct audit_context *new_context = context->previous;
935 		context->previous  = NULL;
936 		audit_free_context(context);
937 		tsk->audit_context = new_context;
938 	} else {
939 		audit_free_names(context);
940 		audit_free_aux(context);
941 		audit_zero_context(context, context->state);
942 		tsk->audit_context = context;
943 	}
944 	put_task_struct(tsk);
945 }
946 
947 /* Add a name to the list.  Called from fs/namei.c:getname(). */
948 void audit_getname(const char *name)
949 {
950 	struct audit_context *context = current->audit_context;
951 
952 	if (!context || IS_ERR(name) || !name)
953 		return;
954 
955 	if (!context->in_syscall) {
956 #if AUDIT_DEBUG == 2
957 		printk(KERN_ERR "%s:%d(:%d): ignoring getname(%p)\n",
958 		       __FILE__, __LINE__, context->serial, name);
959 		dump_stack();
960 #endif
961 		return;
962 	}
963 	BUG_ON(context->name_count >= AUDIT_NAMES);
964 	context->names[context->name_count].name = name;
965 	context->names[context->name_count].ino  = (unsigned long)-1;
966 	++context->name_count;
967 }
968 
969 /* Intercept a putname request.  Called from
970  * include/linux/fs.h:putname().  If we have stored the name from
971  * getname in the audit context, then we delay the putname until syscall
972  * exit. */
973 void audit_putname(const char *name)
974 {
975 	struct audit_context *context = current->audit_context;
976 
977 	BUG_ON(!context);
978 	if (!context->in_syscall) {
979 #if AUDIT_DEBUG == 2
980 		printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n",
981 		       __FILE__, __LINE__, context->serial, name);
982 		if (context->name_count) {
983 			int i;
984 			for (i = 0; i < context->name_count; i++)
985 				printk(KERN_ERR "name[%d] = %p = %s\n", i,
986 				       context->names[i].name,
987 				       context->names[i].name);
988 		}
989 #endif
990 		__putname(name);
991 	}
992 #if AUDIT_DEBUG
993 	else {
994 		++context->put_count;
995 		if (context->put_count > context->name_count) {
996 			printk(KERN_ERR "%s:%d(:%d): major=%d"
997 			       " in_syscall=%d putname(%p) name_count=%d"
998 			       " put_count=%d\n",
999 			       __FILE__, __LINE__,
1000 			       context->serial, context->major,
1001 			       context->in_syscall, name, context->name_count,
1002 			       context->put_count);
1003 			dump_stack();
1004 		}
1005 	}
1006 #endif
1007 }
1008 
1009 /* Store the inode and device from a lookup.  Called from
1010  * fs/namei.c:path_lookup(). */
1011 void audit_inode(const char *name, const struct inode *inode)
1012 {
1013 	int idx;
1014 	struct audit_context *context = current->audit_context;
1015 
1016 	if (!context->in_syscall)
1017 		return;
1018 	if (context->name_count
1019 	    && context->names[context->name_count-1].name
1020 	    && context->names[context->name_count-1].name == name)
1021 		idx = context->name_count - 1;
1022 	else if (context->name_count > 1
1023 		 && context->names[context->name_count-2].name
1024 		 && context->names[context->name_count-2].name == name)
1025 		idx = context->name_count - 2;
1026 	else {
1027 		/* FIXME: how much do we care about inodes that have no
1028 		 * associated name? */
1029 		if (context->name_count >= AUDIT_NAMES - AUDIT_NAMES_RESERVED)
1030 			return;
1031 		idx = context->name_count++;
1032 		context->names[idx].name = NULL;
1033 #if AUDIT_DEBUG
1034 		++context->ino_count;
1035 #endif
1036 	}
1037 	context->names[idx].ino  = inode->i_ino;
1038 	context->names[idx].dev	 = inode->i_sb->s_dev;
1039 	context->names[idx].mode = inode->i_mode;
1040 	context->names[idx].uid  = inode->i_uid;
1041 	context->names[idx].gid  = inode->i_gid;
1042 	context->names[idx].rdev = inode->i_rdev;
1043 }
1044 
1045 int audit_get_stamp(struct audit_context *ctx,
1046 		     struct timespec *t, unsigned int *serial)
1047 {
1048 	if (ctx) {
1049 		t->tv_sec  = ctx->ctime.tv_sec;
1050 		t->tv_nsec = ctx->ctime.tv_nsec;
1051 		*serial    = ctx->serial;
1052 		ctx->auditable = 1;
1053 		return 1;
1054 	}
1055 	return 0;
1056 }
1057 
1058 int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
1059 {
1060 	if (task->audit_context) {
1061 		struct audit_buffer *ab;
1062 
1063 		ab = audit_log_start(NULL, AUDIT_LOGIN);
1064 		if (ab) {
1065 			audit_log_format(ab, "login pid=%d uid=%u "
1066 				"old loginuid=%u new loginuid=%u",
1067 				task->pid, task->uid,
1068 				task->audit_context->loginuid, loginuid);
1069 			audit_log_end(ab);
1070 		}
1071 		task->audit_context->loginuid = loginuid;
1072 	}
1073 	return 0;
1074 }
1075 
1076 uid_t audit_get_loginuid(struct audit_context *ctx)
1077 {
1078 	return ctx ? ctx->loginuid : -1;
1079 }
1080 
1081 int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
1082 {
1083 	struct audit_aux_data_ipcctl *ax;
1084 	struct audit_context *context = current->audit_context;
1085 
1086 	if (likely(!context))
1087 		return 0;
1088 
1089 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
1090 	if (!ax)
1091 		return -ENOMEM;
1092 
1093 	ax->qbytes = qbytes;
1094 	ax->uid = uid;
1095 	ax->gid = gid;
1096 	ax->mode = mode;
1097 
1098 	ax->d.type = AUDIT_IPC;
1099 	ax->d.next = context->aux;
1100 	context->aux = (void *)ax;
1101 	return 0;
1102 }
1103 
1104 int audit_socketcall(int nargs, unsigned long *args)
1105 {
1106 	struct audit_aux_data_socketcall *ax;
1107 	struct audit_context *context = current->audit_context;
1108 
1109 	if (likely(!context))
1110 		return 0;
1111 
1112 	ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL);
1113 	if (!ax)
1114 		return -ENOMEM;
1115 
1116 	ax->nargs = nargs;
1117 	memcpy(ax->args, args, nargs * sizeof(unsigned long));
1118 
1119 	ax->d.type = AUDIT_SOCKETCALL;
1120 	ax->d.next = context->aux;
1121 	context->aux = (void *)ax;
1122 	return 0;
1123 }
1124 
1125 int audit_sockaddr(int len, void *a)
1126 {
1127 	struct audit_aux_data_sockaddr *ax;
1128 	struct audit_context *context = current->audit_context;
1129 
1130 	if (likely(!context))
1131 		return 0;
1132 
1133 	ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL);
1134 	if (!ax)
1135 		return -ENOMEM;
1136 
1137 	ax->len = len;
1138 	memcpy(ax->a, a, len);
1139 
1140 	ax->d.type = AUDIT_SOCKADDR;
1141 	ax->d.next = context->aux;
1142 	context->aux = (void *)ax;
1143 	return 0;
1144 }
1145 
1146 int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt)
1147 {
1148 	struct audit_aux_data_path *ax;
1149 	struct audit_context *context = current->audit_context;
1150 
1151 	if (likely(!context))
1152 		return 0;
1153 
1154 	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
1155 	if (!ax)
1156 		return -ENOMEM;
1157 
1158 	ax->dentry = dget(dentry);
1159 	ax->mnt = mntget(mnt);
1160 
1161 	ax->d.type = AUDIT_AVC_PATH;
1162 	ax->d.next = context->aux;
1163 	context->aux = (void *)ax;
1164 	return 0;
1165 }
1166 
1167 void audit_signal_info(int sig, struct task_struct *t)
1168 {
1169 	extern pid_t audit_sig_pid;
1170 	extern uid_t audit_sig_uid;
1171 
1172 	if (unlikely(audit_pid && t->pid == audit_pid)) {
1173 		if (sig == SIGTERM || sig == SIGHUP) {
1174 			struct audit_context *ctx = current->audit_context;
1175 			audit_sig_pid = current->pid;
1176 			if (ctx)
1177 				audit_sig_uid = ctx->loginuid;
1178 			else
1179 				audit_sig_uid = current->uid;
1180 		}
1181 	}
1182 }
1183 
1184