1 /* auditsc.c -- System-call auditing support 2 * Handles all system-call specific auditing features. 3 * 4 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. 5 * All Rights Reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Written by Rickard E. (Rik) Faith <faith@redhat.com> 22 * 23 * Many of the ideas implemented here are from Stephen C. Tweedie, 24 * especially the idea of avoiding a copy by using getname. 25 * 26 * The method for actual interception of syscall entry and exit (not in 27 * this file -- see entry.S) is based on a GPL'd patch written by 28 * okir@suse.de and Copyright 2003 SuSE Linux AG. 29 * 30 */ 31 32 #include <linux/init.h> 33 #include <asm/atomic.h> 34 #include <asm/types.h> 35 #include <linux/mm.h> 36 #include <linux/module.h> 37 38 #include <linux/audit.h> 39 #include <linux/personality.h> 40 #include <linux/time.h> 41 #include <asm/unistd.h> 42 43 /* 0 = no checking 44 1 = put_count checking 45 2 = verbose put_count checking 46 */ 47 #define AUDIT_DEBUG 0 48 49 /* No syscall auditing will take place unless audit_enabled != 0. */ 50 extern int audit_enabled; 51 52 /* AUDIT_NAMES is the number of slots we reserve in the audit_context 53 * for saving names from getname(). */ 54 #define AUDIT_NAMES 20 55 56 /* AUDIT_NAMES_RESERVED is the number of slots we reserve in the 57 * audit_context from being used for nameless inodes from 58 * path_lookup. */ 59 #define AUDIT_NAMES_RESERVED 7 60 61 /* At task start time, the audit_state is set in the audit_context using 62 a per-task filter. At syscall entry, the audit_state is augmented by 63 the syscall filter. */ 64 enum audit_state { 65 AUDIT_DISABLED, /* Do not create per-task audit_context. 66 * No syscall-specific audit records can 67 * be generated. */ 68 AUDIT_SETUP_CONTEXT, /* Create the per-task audit_context, 69 * but don't necessarily fill it in at 70 * syscall entry time (i.e., filter 71 * instead). */ 72 AUDIT_BUILD_CONTEXT, /* Create the per-task audit_context, 73 * and always fill it in at syscall 74 * entry time. This makes a full 75 * syscall record available if some 76 * other part of the kernel decides it 77 * should be recorded. */ 78 AUDIT_RECORD_CONTEXT /* Create the per-task audit_context, 79 * always fill it in at syscall entry 80 * time, and always write out the audit 81 * record at syscall exit time. */ 82 }; 83 84 /* When fs/namei.c:getname() is called, we store the pointer in name and 85 * we don't let putname() free it (instead we free all of the saved 86 * pointers at syscall exit time). 87 * 88 * Further, in fs/namei.c:path_lookup() we store the inode and device. */ 89 struct audit_names { 90 const char *name; 91 unsigned long ino; 92 dev_t dev; 93 umode_t mode; 94 uid_t uid; 95 gid_t gid; 96 dev_t rdev; 97 }; 98 99 struct audit_aux_data { 100 struct audit_aux_data *next; 101 int type; 102 }; 103 104 #define AUDIT_AUX_IPCPERM 0 105 106 struct audit_aux_data_ipcctl { 107 struct audit_aux_data d; 108 struct ipc_perm p; 109 unsigned long qbytes; 110 uid_t uid; 111 gid_t gid; 112 mode_t mode; 113 }; 114 115 116 /* The per-task audit context. */ 117 struct audit_context { 118 int in_syscall; /* 1 if task is in a syscall */ 119 enum audit_state state; 120 unsigned int serial; /* serial number for record */ 121 struct timespec ctime; /* time of syscall entry */ 122 uid_t loginuid; /* login uid (identity) */ 123 int major; /* syscall number */ 124 unsigned long argv[4]; /* syscall arguments */ 125 int return_valid; /* return code is valid */ 126 long return_code;/* syscall return code */ 127 int auditable; /* 1 if record should be written */ 128 int name_count; 129 struct audit_names names[AUDIT_NAMES]; 130 struct audit_context *previous; /* For nested syscalls */ 131 struct audit_aux_data *aux; 132 133 /* Save things to print about task_struct */ 134 pid_t pid; 135 uid_t uid, euid, suid, fsuid; 136 gid_t gid, egid, sgid, fsgid; 137 unsigned long personality; 138 int arch; 139 140 #if AUDIT_DEBUG 141 int put_count; 142 int ino_count; 143 #endif 144 }; 145 146 /* Public API */ 147 /* There are three lists of rules -- one to search at task creation 148 * time, one to search at syscall entry time, and another to search at 149 * syscall exit time. */ 150 static LIST_HEAD(audit_tsklist); 151 static LIST_HEAD(audit_entlist); 152 static LIST_HEAD(audit_extlist); 153 154 struct audit_entry { 155 struct list_head list; 156 struct rcu_head rcu; 157 struct audit_rule rule; 158 }; 159 160 /* Check to see if two rules are identical. It is called from 161 * audit_del_rule during AUDIT_DEL. */ 162 static int audit_compare_rule(struct audit_rule *a, struct audit_rule *b) 163 { 164 int i; 165 166 if (a->flags != b->flags) 167 return 1; 168 169 if (a->action != b->action) 170 return 1; 171 172 if (a->field_count != b->field_count) 173 return 1; 174 175 for (i = 0; i < a->field_count; i++) { 176 if (a->fields[i] != b->fields[i] 177 || a->values[i] != b->values[i]) 178 return 1; 179 } 180 181 for (i = 0; i < AUDIT_BITMASK_SIZE; i++) 182 if (a->mask[i] != b->mask[i]) 183 return 1; 184 185 return 0; 186 } 187 188 /* Note that audit_add_rule and audit_del_rule are called via 189 * audit_receive() in audit.c, and are protected by 190 * audit_netlink_sem. */ 191 static inline int audit_add_rule(struct audit_entry *entry, 192 struct list_head *list) 193 { 194 if (entry->rule.flags & AUDIT_PREPEND) { 195 entry->rule.flags &= ~AUDIT_PREPEND; 196 list_add_rcu(&entry->list, list); 197 } else { 198 list_add_tail_rcu(&entry->list, list); 199 } 200 return 0; 201 } 202 203 static void audit_free_rule(struct rcu_head *head) 204 { 205 struct audit_entry *e = container_of(head, struct audit_entry, rcu); 206 kfree(e); 207 } 208 209 /* Note that audit_add_rule and audit_del_rule are called via 210 * audit_receive() in audit.c, and are protected by 211 * audit_netlink_sem. */ 212 static inline int audit_del_rule(struct audit_rule *rule, 213 struct list_head *list) 214 { 215 struct audit_entry *e; 216 217 /* Do not use the _rcu iterator here, since this is the only 218 * deletion routine. */ 219 list_for_each_entry(e, list, list) { 220 if (!audit_compare_rule(rule, &e->rule)) { 221 list_del_rcu(&e->list); 222 call_rcu(&e->rcu, audit_free_rule); 223 return 0; 224 } 225 } 226 return -EFAULT; /* No matching rule */ 227 } 228 229 #ifdef CONFIG_NET 230 /* Copy rule from user-space to kernel-space. Called during 231 * AUDIT_ADD. */ 232 static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s) 233 { 234 int i; 235 236 if (s->action != AUDIT_NEVER 237 && s->action != AUDIT_POSSIBLE 238 && s->action != AUDIT_ALWAYS) 239 return -1; 240 if (s->field_count < 0 || s->field_count > AUDIT_MAX_FIELDS) 241 return -1; 242 243 d->flags = s->flags; 244 d->action = s->action; 245 d->field_count = s->field_count; 246 for (i = 0; i < d->field_count; i++) { 247 d->fields[i] = s->fields[i]; 248 d->values[i] = s->values[i]; 249 } 250 for (i = 0; i < AUDIT_BITMASK_SIZE; i++) d->mask[i] = s->mask[i]; 251 return 0; 252 } 253 254 int audit_receive_filter(int type, int pid, int uid, int seq, void *data) 255 { 256 u32 flags; 257 struct audit_entry *entry; 258 int err = 0; 259 260 switch (type) { 261 case AUDIT_LIST: 262 /* The *_rcu iterators not needed here because we are 263 always called with audit_netlink_sem held. */ 264 list_for_each_entry(entry, &audit_tsklist, list) 265 audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, 266 &entry->rule, sizeof(entry->rule)); 267 list_for_each_entry(entry, &audit_entlist, list) 268 audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, 269 &entry->rule, sizeof(entry->rule)); 270 list_for_each_entry(entry, &audit_extlist, list) 271 audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, 272 &entry->rule, sizeof(entry->rule)); 273 audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); 274 break; 275 case AUDIT_ADD: 276 if (!(entry = kmalloc(sizeof(*entry), GFP_KERNEL))) 277 return -ENOMEM; 278 if (audit_copy_rule(&entry->rule, data)) { 279 kfree(entry); 280 return -EINVAL; 281 } 282 flags = entry->rule.flags; 283 if (!err && (flags & AUDIT_PER_TASK)) 284 err = audit_add_rule(entry, &audit_tsklist); 285 if (!err && (flags & AUDIT_AT_ENTRY)) 286 err = audit_add_rule(entry, &audit_entlist); 287 if (!err && (flags & AUDIT_AT_EXIT)) 288 err = audit_add_rule(entry, &audit_extlist); 289 break; 290 case AUDIT_DEL: 291 flags =((struct audit_rule *)data)->flags; 292 if (!err && (flags & AUDIT_PER_TASK)) 293 err = audit_del_rule(data, &audit_tsklist); 294 if (!err && (flags & AUDIT_AT_ENTRY)) 295 err = audit_del_rule(data, &audit_entlist); 296 if (!err && (flags & AUDIT_AT_EXIT)) 297 err = audit_del_rule(data, &audit_extlist); 298 break; 299 default: 300 return -EINVAL; 301 } 302 303 return err; 304 } 305 #endif 306 307 /* Compare a task_struct with an audit_rule. Return 1 on match, 0 308 * otherwise. */ 309 static int audit_filter_rules(struct task_struct *tsk, 310 struct audit_rule *rule, 311 struct audit_context *ctx, 312 enum audit_state *state) 313 { 314 int i, j; 315 316 for (i = 0; i < rule->field_count; i++) { 317 u32 field = rule->fields[i] & ~AUDIT_NEGATE; 318 u32 value = rule->values[i]; 319 int result = 0; 320 321 switch (field) { 322 case AUDIT_PID: 323 result = (tsk->pid == value); 324 break; 325 case AUDIT_UID: 326 result = (tsk->uid == value); 327 break; 328 case AUDIT_EUID: 329 result = (tsk->euid == value); 330 break; 331 case AUDIT_SUID: 332 result = (tsk->suid == value); 333 break; 334 case AUDIT_FSUID: 335 result = (tsk->fsuid == value); 336 break; 337 case AUDIT_GID: 338 result = (tsk->gid == value); 339 break; 340 case AUDIT_EGID: 341 result = (tsk->egid == value); 342 break; 343 case AUDIT_SGID: 344 result = (tsk->sgid == value); 345 break; 346 case AUDIT_FSGID: 347 result = (tsk->fsgid == value); 348 break; 349 case AUDIT_PERS: 350 result = (tsk->personality == value); 351 break; 352 case AUDIT_ARCH: 353 if (ctx) 354 result = (ctx->arch == value); 355 break; 356 357 case AUDIT_EXIT: 358 if (ctx && ctx->return_valid) 359 result = (ctx->return_code == value); 360 break; 361 case AUDIT_SUCCESS: 362 if (ctx && ctx->return_valid) 363 result = (ctx->return_valid == AUDITSC_SUCCESS); 364 break; 365 case AUDIT_DEVMAJOR: 366 if (ctx) { 367 for (j = 0; j < ctx->name_count; j++) { 368 if (MAJOR(ctx->names[j].dev)==value) { 369 ++result; 370 break; 371 } 372 } 373 } 374 break; 375 case AUDIT_DEVMINOR: 376 if (ctx) { 377 for (j = 0; j < ctx->name_count; j++) { 378 if (MINOR(ctx->names[j].dev)==value) { 379 ++result; 380 break; 381 } 382 } 383 } 384 break; 385 case AUDIT_INODE: 386 if (ctx) { 387 for (j = 0; j < ctx->name_count; j++) { 388 if (ctx->names[j].ino == value) { 389 ++result; 390 break; 391 } 392 } 393 } 394 break; 395 case AUDIT_LOGINUID: 396 result = 0; 397 if (ctx) 398 result = (ctx->loginuid == value); 399 break; 400 case AUDIT_ARG0: 401 case AUDIT_ARG1: 402 case AUDIT_ARG2: 403 case AUDIT_ARG3: 404 if (ctx) 405 result = (ctx->argv[field-AUDIT_ARG0]==value); 406 break; 407 } 408 409 if (rule->fields[i] & AUDIT_NEGATE) 410 result = !result; 411 if (!result) 412 return 0; 413 } 414 switch (rule->action) { 415 case AUDIT_NEVER: *state = AUDIT_DISABLED; break; 416 case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break; 417 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; 418 } 419 return 1; 420 } 421 422 /* At process creation time, we can determine if system-call auditing is 423 * completely disabled for this task. Since we only have the task 424 * structure at this point, we can only check uid and gid. 425 */ 426 static enum audit_state audit_filter_task(struct task_struct *tsk) 427 { 428 struct audit_entry *e; 429 enum audit_state state; 430 431 rcu_read_lock(); 432 list_for_each_entry_rcu(e, &audit_tsklist, list) { 433 if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { 434 rcu_read_unlock(); 435 return state; 436 } 437 } 438 rcu_read_unlock(); 439 return AUDIT_BUILD_CONTEXT; 440 } 441 442 /* At syscall entry and exit time, this filter is called if the 443 * audit_state is not low enough that auditing cannot take place, but is 444 * also not high enough that we already know we have to write and audit 445 * record (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT). 446 */ 447 static enum audit_state audit_filter_syscall(struct task_struct *tsk, 448 struct audit_context *ctx, 449 struct list_head *list) 450 { 451 struct audit_entry *e; 452 enum audit_state state; 453 int word = AUDIT_WORD(ctx->major); 454 int bit = AUDIT_BIT(ctx->major); 455 456 rcu_read_lock(); 457 list_for_each_entry_rcu(e, list, list) { 458 if ((e->rule.mask[word] & bit) == bit 459 && audit_filter_rules(tsk, &e->rule, ctx, &state)) { 460 rcu_read_unlock(); 461 return state; 462 } 463 } 464 rcu_read_unlock(); 465 return AUDIT_BUILD_CONTEXT; 466 } 467 468 /* This should be called with task_lock() held. */ 469 static inline struct audit_context *audit_get_context(struct task_struct *tsk, 470 int return_valid, 471 int return_code) 472 { 473 struct audit_context *context = tsk->audit_context; 474 475 if (likely(!context)) 476 return NULL; 477 context->return_valid = return_valid; 478 context->return_code = return_code; 479 480 if (context->in_syscall && !context->auditable) { 481 enum audit_state state; 482 state = audit_filter_syscall(tsk, context, &audit_extlist); 483 if (state == AUDIT_RECORD_CONTEXT) 484 context->auditable = 1; 485 } 486 487 context->pid = tsk->pid; 488 context->uid = tsk->uid; 489 context->gid = tsk->gid; 490 context->euid = tsk->euid; 491 context->suid = tsk->suid; 492 context->fsuid = tsk->fsuid; 493 context->egid = tsk->egid; 494 context->sgid = tsk->sgid; 495 context->fsgid = tsk->fsgid; 496 context->personality = tsk->personality; 497 tsk->audit_context = NULL; 498 return context; 499 } 500 501 static inline void audit_free_names(struct audit_context *context) 502 { 503 int i; 504 505 #if AUDIT_DEBUG == 2 506 if (context->auditable 507 ||context->put_count + context->ino_count != context->name_count) { 508 printk(KERN_ERR "audit.c:%d(:%d): major=%d in_syscall=%d" 509 " name_count=%d put_count=%d" 510 " ino_count=%d [NOT freeing]\n", 511 __LINE__, 512 context->serial, context->major, context->in_syscall, 513 context->name_count, context->put_count, 514 context->ino_count); 515 for (i = 0; i < context->name_count; i++) 516 printk(KERN_ERR "names[%d] = %p = %s\n", i, 517 context->names[i].name, 518 context->names[i].name); 519 dump_stack(); 520 return; 521 } 522 #endif 523 #if AUDIT_DEBUG 524 context->put_count = 0; 525 context->ino_count = 0; 526 #endif 527 528 for (i = 0; i < context->name_count; i++) 529 if (context->names[i].name) 530 __putname(context->names[i].name); 531 context->name_count = 0; 532 } 533 534 static inline void audit_free_aux(struct audit_context *context) 535 { 536 struct audit_aux_data *aux; 537 538 while ((aux = context->aux)) { 539 context->aux = aux->next; 540 kfree(aux); 541 } 542 } 543 544 static inline void audit_zero_context(struct audit_context *context, 545 enum audit_state state) 546 { 547 uid_t loginuid = context->loginuid; 548 549 memset(context, 0, sizeof(*context)); 550 context->state = state; 551 context->loginuid = loginuid; 552 } 553 554 static inline struct audit_context *audit_alloc_context(enum audit_state state) 555 { 556 struct audit_context *context; 557 558 if (!(context = kmalloc(sizeof(*context), GFP_KERNEL))) 559 return NULL; 560 audit_zero_context(context, state); 561 return context; 562 } 563 564 /* Filter on the task information and allocate a per-task audit context 565 * if necessary. Doing so turns on system call auditing for the 566 * specified task. This is called from copy_process, so no lock is 567 * needed. */ 568 int audit_alloc(struct task_struct *tsk) 569 { 570 struct audit_context *context; 571 enum audit_state state; 572 573 if (likely(!audit_enabled)) 574 return 0; /* Return if not auditing. */ 575 576 state = audit_filter_task(tsk); 577 if (likely(state == AUDIT_DISABLED)) 578 return 0; 579 580 if (!(context = audit_alloc_context(state))) { 581 audit_log_lost("out of memory in audit_alloc"); 582 return -ENOMEM; 583 } 584 585 /* Preserve login uid */ 586 context->loginuid = -1; 587 if (current->audit_context) 588 context->loginuid = current->audit_context->loginuid; 589 590 tsk->audit_context = context; 591 set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT); 592 return 0; 593 } 594 595 static inline void audit_free_context(struct audit_context *context) 596 { 597 struct audit_context *previous; 598 int count = 0; 599 600 do { 601 previous = context->previous; 602 if (previous || (count && count < 10)) { 603 ++count; 604 printk(KERN_ERR "audit(:%d): major=%d name_count=%d:" 605 " freeing multiple contexts (%d)\n", 606 context->serial, context->major, 607 context->name_count, count); 608 } 609 audit_free_names(context); 610 audit_free_aux(context); 611 kfree(context); 612 context = previous; 613 } while (context); 614 if (count >= 10) 615 printk(KERN_ERR "audit: freed %d contexts\n", count); 616 } 617 618 static void audit_log_task_info(struct audit_buffer *ab) 619 { 620 char name[sizeof(current->comm)]; 621 struct mm_struct *mm = current->mm; 622 struct vm_area_struct *vma; 623 624 get_task_comm(name, current); 625 audit_log_format(ab, " comm=%s", name); 626 627 if (!mm) 628 return; 629 630 down_read(&mm->mmap_sem); 631 vma = mm->mmap; 632 while (vma) { 633 if ((vma->vm_flags & VM_EXECUTABLE) && 634 vma->vm_file) { 635 audit_log_d_path(ab, "exe=", 636 vma->vm_file->f_dentry, 637 vma->vm_file->f_vfsmnt); 638 break; 639 } 640 vma = vma->vm_next; 641 } 642 up_read(&mm->mmap_sem); 643 } 644 645 static void audit_log_exit(struct audit_context *context) 646 { 647 int i; 648 struct audit_buffer *ab; 649 650 ab = audit_log_start(context); 651 if (!ab) 652 return; /* audit_panic has been called */ 653 audit_log_format(ab, "syscall=%d", context->major); 654 if (context->personality != PER_LINUX) 655 audit_log_format(ab, " per=%lx", context->personality); 656 audit_log_format(ab, " arch=%x", context->arch); 657 if (context->return_valid) 658 audit_log_format(ab, " success=%s exit=%ld", 659 (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", 660 context->return_code); 661 audit_log_format(ab, 662 " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" 663 " pid=%d loginuid=%d uid=%d gid=%d" 664 " euid=%d suid=%d fsuid=%d" 665 " egid=%d sgid=%d fsgid=%d", 666 context->argv[0], 667 context->argv[1], 668 context->argv[2], 669 context->argv[3], 670 context->name_count, 671 context->pid, 672 context->loginuid, 673 context->uid, 674 context->gid, 675 context->euid, context->suid, context->fsuid, 676 context->egid, context->sgid, context->fsgid); 677 audit_log_task_info(ab); 678 audit_log_end(ab); 679 while (context->aux) { 680 struct audit_aux_data *aux; 681 682 ab = audit_log_start(context); 683 if (!ab) 684 continue; /* audit_panic has been called */ 685 686 aux = context->aux; 687 context->aux = aux->next; 688 689 audit_log_format(ab, "auxitem=%d", aux->type); 690 switch (aux->type) { 691 case AUDIT_AUX_IPCPERM: { 692 struct audit_aux_data_ipcctl *axi = (void *)aux; 693 audit_log_format(ab, 694 " qbytes=%lx uid=%d gid=%d mode=%x", 695 axi->qbytes, axi->uid, axi->gid, axi->mode); 696 } 697 } 698 audit_log_end(ab); 699 kfree(aux); 700 } 701 702 for (i = 0; i < context->name_count; i++) { 703 ab = audit_log_start(context); 704 if (!ab) 705 continue; /* audit_panic has been called */ 706 audit_log_format(ab, "item=%d", i); 707 if (context->names[i].name) { 708 audit_log_format(ab, " name="); 709 audit_log_untrustedstring(ab, context->names[i].name); 710 } 711 if (context->names[i].ino != (unsigned long)-1) 712 audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o" 713 " uid=%d gid=%d rdev=%02x:%02x", 714 context->names[i].ino, 715 MAJOR(context->names[i].dev), 716 MINOR(context->names[i].dev), 717 context->names[i].mode, 718 context->names[i].uid, 719 context->names[i].gid, 720 MAJOR(context->names[i].rdev), 721 MINOR(context->names[i].rdev)); 722 audit_log_end(ab); 723 } 724 } 725 726 /* Free a per-task audit context. Called from copy_process and 727 * __put_task_struct. */ 728 void audit_free(struct task_struct *tsk) 729 { 730 struct audit_context *context; 731 732 task_lock(tsk); 733 context = audit_get_context(tsk, 0, 0); 734 task_unlock(tsk); 735 736 if (likely(!context)) 737 return; 738 739 /* Check for system calls that do not go through the exit 740 * function (e.g., exit_group), then free context block. */ 741 if (context->in_syscall && context->auditable) 742 audit_log_exit(context); 743 744 audit_free_context(context); 745 } 746 747 /* Compute a serial number for the audit record. Audit records are 748 * written to user-space as soon as they are generated, so a complete 749 * audit record may be written in several pieces. The timestamp of the 750 * record and this serial number are used by the user-space daemon to 751 * determine which pieces belong to the same audit record. The 752 * (timestamp,serial) tuple is unique for each syscall and is live from 753 * syscall entry to syscall exit. 754 * 755 * Atomic values are only guaranteed to be 24-bit, so we count down. 756 * 757 * NOTE: Another possibility is to store the formatted records off the 758 * audit context (for those records that have a context), and emit them 759 * all at syscall exit. However, this could delay the reporting of 760 * significant errors until syscall exit (or never, if the system 761 * halts). */ 762 static inline unsigned int audit_serial(void) 763 { 764 static atomic_t serial = ATOMIC_INIT(0xffffff); 765 unsigned int a, b; 766 767 do { 768 a = atomic_read(&serial); 769 if (atomic_dec_and_test(&serial)) 770 atomic_set(&serial, 0xffffff); 771 b = atomic_read(&serial); 772 } while (b != a - 1); 773 774 return 0xffffff - b; 775 } 776 777 /* Fill in audit context at syscall entry. This only happens if the 778 * audit context was created when the task was created and the state or 779 * filters demand the audit context be built. If the state from the 780 * per-task filter or from the per-syscall filter is AUDIT_RECORD_CONTEXT, 781 * then the record will be written at syscall exit time (otherwise, it 782 * will only be written if another part of the kernel requests that it 783 * be written). */ 784 void audit_syscall_entry(struct task_struct *tsk, int arch, int major, 785 unsigned long a1, unsigned long a2, 786 unsigned long a3, unsigned long a4) 787 { 788 struct audit_context *context = tsk->audit_context; 789 enum audit_state state; 790 791 BUG_ON(!context); 792 793 /* This happens only on certain architectures that make system 794 * calls in kernel_thread via the entry.S interface, instead of 795 * with direct calls. (If you are porting to a new 796 * architecture, hitting this condition can indicate that you 797 * got the _exit/_leave calls backward in entry.S.) 798 * 799 * i386 no 800 * x86_64 no 801 * ppc64 yes (see arch/ppc64/kernel/misc.S) 802 * 803 * This also happens with vm86 emulation in a non-nested manner 804 * (entries without exits), so this case must be caught. 805 */ 806 if (context->in_syscall) { 807 struct audit_context *newctx; 808 809 #if defined(__NR_vm86) && defined(__NR_vm86old) 810 /* vm86 mode should only be entered once */ 811 if (major == __NR_vm86 || major == __NR_vm86old) 812 return; 813 #endif 814 #if AUDIT_DEBUG 815 printk(KERN_ERR 816 "audit(:%d) pid=%d in syscall=%d;" 817 " entering syscall=%d\n", 818 context->serial, tsk->pid, context->major, major); 819 #endif 820 newctx = audit_alloc_context(context->state); 821 if (newctx) { 822 newctx->previous = context; 823 context = newctx; 824 tsk->audit_context = newctx; 825 } else { 826 /* If we can't alloc a new context, the best we 827 * can do is to leak memory (any pending putname 828 * will be lost). The only other alternative is 829 * to abandon auditing. */ 830 audit_zero_context(context, context->state); 831 } 832 } 833 BUG_ON(context->in_syscall || context->name_count); 834 835 if (!audit_enabled) 836 return; 837 838 context->arch = arch; 839 context->major = major; 840 context->argv[0] = a1; 841 context->argv[1] = a2; 842 context->argv[2] = a3; 843 context->argv[3] = a4; 844 845 state = context->state; 846 if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT) 847 state = audit_filter_syscall(tsk, context, &audit_entlist); 848 if (likely(state == AUDIT_DISABLED)) 849 return; 850 851 context->serial = audit_serial(); 852 context->ctime = CURRENT_TIME; 853 context->in_syscall = 1; 854 context->auditable = !!(state == AUDIT_RECORD_CONTEXT); 855 } 856 857 /* Tear down after system call. If the audit context has been marked as 858 * auditable (either because of the AUDIT_RECORD_CONTEXT state from 859 * filtering, or because some other part of the kernel write an audit 860 * message), then write out the syscall information. In call cases, 861 * free the names stored from getname(). */ 862 void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) 863 { 864 struct audit_context *context; 865 866 get_task_struct(tsk); 867 task_lock(tsk); 868 context = audit_get_context(tsk, valid, return_code); 869 task_unlock(tsk); 870 871 /* Not having a context here is ok, since the parent may have 872 * called __put_task_struct. */ 873 if (likely(!context)) 874 return; 875 876 if (context->in_syscall && context->auditable) 877 audit_log_exit(context); 878 879 context->in_syscall = 0; 880 context->auditable = 0; 881 882 if (context->previous) { 883 struct audit_context *new_context = context->previous; 884 context->previous = NULL; 885 audit_free_context(context); 886 tsk->audit_context = new_context; 887 } else { 888 audit_free_names(context); 889 audit_free_aux(context); 890 audit_zero_context(context, context->state); 891 tsk->audit_context = context; 892 } 893 put_task_struct(tsk); 894 } 895 896 /* Add a name to the list. Called from fs/namei.c:getname(). */ 897 void audit_getname(const char *name) 898 { 899 struct audit_context *context = current->audit_context; 900 901 if (!context || IS_ERR(name) || !name) 902 return; 903 904 if (!context->in_syscall) { 905 #if AUDIT_DEBUG == 2 906 printk(KERN_ERR "%s:%d(:%d): ignoring getname(%p)\n", 907 __FILE__, __LINE__, context->serial, name); 908 dump_stack(); 909 #endif 910 return; 911 } 912 BUG_ON(context->name_count >= AUDIT_NAMES); 913 context->names[context->name_count].name = name; 914 context->names[context->name_count].ino = (unsigned long)-1; 915 ++context->name_count; 916 } 917 918 /* Intercept a putname request. Called from 919 * include/linux/fs.h:putname(). If we have stored the name from 920 * getname in the audit context, then we delay the putname until syscall 921 * exit. */ 922 void audit_putname(const char *name) 923 { 924 struct audit_context *context = current->audit_context; 925 926 BUG_ON(!context); 927 if (!context->in_syscall) { 928 #if AUDIT_DEBUG == 2 929 printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n", 930 __FILE__, __LINE__, context->serial, name); 931 if (context->name_count) { 932 int i; 933 for (i = 0; i < context->name_count; i++) 934 printk(KERN_ERR "name[%d] = %p = %s\n", i, 935 context->names[i].name, 936 context->names[i].name); 937 } 938 #endif 939 __putname(name); 940 } 941 #if AUDIT_DEBUG 942 else { 943 ++context->put_count; 944 if (context->put_count > context->name_count) { 945 printk(KERN_ERR "%s:%d(:%d): major=%d" 946 " in_syscall=%d putname(%p) name_count=%d" 947 " put_count=%d\n", 948 __FILE__, __LINE__, 949 context->serial, context->major, 950 context->in_syscall, name, context->name_count, 951 context->put_count); 952 dump_stack(); 953 } 954 } 955 #endif 956 } 957 958 /* Store the inode and device from a lookup. Called from 959 * fs/namei.c:path_lookup(). */ 960 void audit_inode(const char *name, const struct inode *inode) 961 { 962 int idx; 963 struct audit_context *context = current->audit_context; 964 965 if (!context->in_syscall) 966 return; 967 if (context->name_count 968 && context->names[context->name_count-1].name 969 && context->names[context->name_count-1].name == name) 970 idx = context->name_count - 1; 971 else if (context->name_count > 1 972 && context->names[context->name_count-2].name 973 && context->names[context->name_count-2].name == name) 974 idx = context->name_count - 2; 975 else { 976 /* FIXME: how much do we care about inodes that have no 977 * associated name? */ 978 if (context->name_count >= AUDIT_NAMES - AUDIT_NAMES_RESERVED) 979 return; 980 idx = context->name_count++; 981 context->names[idx].name = NULL; 982 #if AUDIT_DEBUG 983 ++context->ino_count; 984 #endif 985 } 986 context->names[idx].ino = inode->i_ino; 987 context->names[idx].dev = inode->i_sb->s_dev; 988 context->names[idx].mode = inode->i_mode; 989 context->names[idx].uid = inode->i_uid; 990 context->names[idx].gid = inode->i_gid; 991 context->names[idx].rdev = inode->i_rdev; 992 } 993 994 void audit_get_stamp(struct audit_context *ctx, 995 struct timespec *t, unsigned int *serial) 996 { 997 if (ctx) { 998 t->tv_sec = ctx->ctime.tv_sec; 999 t->tv_nsec = ctx->ctime.tv_nsec; 1000 *serial = ctx->serial; 1001 ctx->auditable = 1; 1002 } else { 1003 *t = CURRENT_TIME; 1004 *serial = 0; 1005 } 1006 } 1007 1008 extern int audit_set_type(struct audit_buffer *ab, int type); 1009 1010 int audit_set_loginuid(struct audit_context *ctx, uid_t loginuid) 1011 { 1012 if (ctx) { 1013 struct audit_buffer *ab; 1014 1015 ab = audit_log_start(NULL); 1016 if (ab) { 1017 audit_log_format(ab, "login pid=%d uid=%u " 1018 "old loginuid=%u new loginuid=%u", 1019 ctx->pid, ctx->uid, ctx->loginuid, loginuid); 1020 audit_set_type(ab, AUDIT_LOGIN); 1021 audit_log_end(ab); 1022 } 1023 ctx->loginuid = loginuid; 1024 } 1025 return 0; 1026 } 1027 1028 uid_t audit_get_loginuid(struct audit_context *ctx) 1029 { 1030 return ctx ? ctx->loginuid : -1; 1031 } 1032 1033 int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) 1034 { 1035 struct audit_aux_data_ipcctl *ax; 1036 struct audit_context *context = current->audit_context; 1037 1038 if (likely(!context)) 1039 return 0; 1040 1041 ax = kmalloc(sizeof(*ax), GFP_KERNEL); 1042 if (!ax) 1043 return -ENOMEM; 1044 1045 ax->qbytes = qbytes; 1046 ax->uid = uid; 1047 ax->gid = gid; 1048 ax->mode = mode; 1049 1050 ax->d.type = AUDIT_AUX_IPCPERM; 1051 ax->d.next = context->aux; 1052 context->aux = (void *)ax; 1053 return 0; 1054 } 1055