1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 30 * $FreeBSD: src/sys/kern/kern_ktrace.c,v 1.35.2.6 2002/07/05 22:36:38 darrenr Exp $ 31 */ 32 33 #include "opt_ktrace.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/uio.h> 38 #include <sys/sysmsg.h> 39 #include <sys/kernel.h> 40 #include <sys/proc.h> 41 #include <sys/fcntl.h> 42 #include <sys/lock.h> 43 #include <sys/nlookup.h> 44 #include <sys/vnode.h> 45 #include <sys/ktrace.h> 46 #include <sys/malloc.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/sysent.h> 50 51 #include <vm/vm_zone.h> 52 53 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 54 55 #ifdef KTRACE 56 static void ktrgetheader (struct ktr_header *kth, int type); 57 static struct ktr_syscall *ktrgetsyscall(struct ktr_header *kth, 58 struct ktr_syscall *ktp_cache, int narg); 59 static void ktrputsyscall(struct ktr_syscall *ktp_cache, 60 struct ktr_syscall *ktp); 61 static void ktrwrite (struct lwp *, struct ktr_header *, struct uio *); 62 static int ktrcanset (struct thread *,struct proc *); 63 static int ktrsetchildren (struct thread *, struct proc *, 64 int, int, ktrace_node_t); 65 static int ktrops (struct thread *,struct proc *,int,int, ktrace_node_t); 66 67 /* 68 * MPSAFE 69 */ 70 static 71 void 72 ktrgetheader(struct ktr_header *kth, int type) 73 { 74 thread_t td = curthread; 75 struct proc *p = td->td_proc; 76 struct lwp *lp = td->td_lwp; 77 78 kth->ktr_type = type; 79 /* XXX threaded flag is a hack at the moment */ 80 kth->ktr_flags = (p->p_nthreads > 1) ? KTRH_THREADED : 0; 81 kth->ktr_flags |= KTRH_CPUID_ENCODE(td->td_gd->gd_cpuid); 82 /*microtime(&kth->ktr_time); set in ktrwrite */ 83 kth->ktr_pid = p->p_pid; 84 kth->ktr_tid = lp->lwp_tid; 85 bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN + 1); 86 } 87 88 static 89 struct ktr_syscall * 90 ktrgetsyscall(struct ktr_header *kth, struct ktr_syscall *ktp_cache, int narg) 91 { 92 size_t len; 93 94 len = offsetof(struct ktr_syscall, ktr_args[narg]); 95 if (len > sizeof(*ktp_cache)) 96 ktp_cache = kmalloc(len, M_KTRACE, M_WAITOK); 97 kth->ktr_buf = (caddr_t)ktp_cache; 98 kth->ktr_len = (int)len; 99 return (ktp_cache); 100 } 101 102 static 103 void 104 ktrputsyscall(struct ktr_syscall *ktp_cache, struct ktr_syscall *ktp) 105 { 106 if (ktp != ktp_cache) 107 kfree(ktp, M_KTRACE); 108 } 109 110 void 111 ktrsyscall(struct lwp *lp, int code, int narg, union sysunion *uap) 112 { 113 struct ktr_header kth; 114 struct ktr_syscall ktp_cache; 115 struct ktr_syscall *ktp; 116 register_t *argp; 117 register_t *args = (void *)uap; 118 int i; 119 120 /* 121 * Setting the active bit prevents a ktrace recursion from the 122 * ktracing op itself. 123 */ 124 lp->lwp_traceflag |= KTRFAC_ACTIVE; 125 ktrgetheader(&kth, KTR_SYSCALL); 126 127 ktp = ktrgetsyscall(&kth, &ktp_cache, narg); 128 ktp->ktr_code = code; 129 ktp->ktr_narg = narg; 130 argp = &ktp->ktr_args[0]; 131 for (i = 0; i < narg; i++) 132 *argp++ = args[i]; 133 ktrwrite(lp, &kth, NULL); 134 135 ktrputsyscall(&ktp_cache, ktp); 136 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 137 } 138 139 void 140 ktrsysret(struct lwp *lp, int code, int error, register_t retval) 141 { 142 struct ktr_header kth; 143 struct ktr_sysret ktp; 144 145 lp->lwp_traceflag |= KTRFAC_ACTIVE; 146 ktrgetheader(&kth, KTR_SYSRET); 147 148 ktp.ktr_code = code; 149 ktp.ktr_error = error; 150 if (error == 0) 151 ktp.ktr_retval = retval; /* what about val2 ? */ 152 else 153 ktp.ktr_retval = 0; 154 155 kth.ktr_buf = (caddr_t)&ktp; 156 kth.ktr_len = (int)sizeof(struct ktr_sysret); 157 158 ktrwrite(lp, &kth, NULL); 159 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 160 } 161 162 void 163 ktrnamei(struct lwp *lp, char *path) 164 { 165 struct ktr_header kth; 166 167 lp->lwp_traceflag |= KTRFAC_ACTIVE; 168 ktrgetheader(&kth, KTR_NAMEI); 169 170 kth.ktr_len = (int)strlen(path); 171 kth.ktr_buf = path; 172 173 ktrwrite(lp, &kth, NULL); 174 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 175 } 176 177 void 178 ktrsysctl(struct lwp *lp, int *name, u_int namelen) 179 { 180 struct ktr_header kth; 181 u_int mib[CTL_MAXNAME + 2]; 182 char *mibname; 183 size_t mibnamelen; 184 int error; 185 186 lp->lwp_traceflag |= KTRFAC_ACTIVE; 187 /* Lookup name of mib. */ 188 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 189 mib[0] = 0; 190 mib[1] = 1; 191 bcopy(name, mib + 2, namelen * sizeof(*name)); 192 mibnamelen = 128; 193 mibname = kmalloc(mibnamelen, M_KTRACE, M_WAITOK); 194 error = kernel_sysctl(mib, namelen + 2, mibname, &mibnamelen, 195 NULL, 0, &mibnamelen); 196 if (error) { 197 kfree(mibname, M_KTRACE); 198 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 199 return; 200 } 201 ktrgetheader(&kth, KTR_SYSCTL); 202 kth.ktr_len = mibnamelen; 203 kth.ktr_buf = mibname; 204 ktrwrite(lp, &kth, NULL); 205 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 206 } 207 208 void 209 ktrgenio(struct lwp *lp, int fd, enum uio_rw rw, struct uio *uio, int error) 210 { 211 struct ktr_header kth; 212 struct ktr_genio ktg; 213 214 if (error) 215 return; 216 lp->lwp_traceflag |= KTRFAC_ACTIVE; 217 ktrgetheader(&kth, KTR_GENIO); 218 219 ktg.ktr_fd = fd; 220 ktg.ktr_rw = rw; 221 kth.ktr_buf = (caddr_t)&ktg; 222 kth.ktr_len = (int)sizeof(struct ktr_genio); 223 uio->uio_offset = 0; 224 uio->uio_rw = UIO_WRITE; 225 226 ktrwrite(lp, &kth, uio); 227 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 228 } 229 230 void 231 ktrpsig(struct lwp *lp, int sig, sig_t action, sigset_t *mask, int code) 232 { 233 struct ktr_header kth; 234 struct ktr_psig kp; 235 236 lp->lwp_traceflag |= KTRFAC_ACTIVE; 237 ktrgetheader(&kth, KTR_PSIG); 238 239 kp.signo = (char)sig; 240 kp.action = action; 241 kp.mask = *mask; 242 kp.code = code; 243 kth.ktr_buf = (caddr_t)&kp; 244 kth.ktr_len = (int)sizeof(struct ktr_psig); 245 246 ktrwrite(lp, &kth, NULL); 247 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 248 } 249 250 void 251 ktrcsw(struct lwp *lp, int out, int user) 252 { 253 struct ktr_header kth; 254 struct ktr_csw kc; 255 256 lp->lwp_traceflag |= KTRFAC_ACTIVE; 257 ktrgetheader(&kth, KTR_CSW); 258 259 kc.out = out; 260 kc.user = user; 261 kth.ktr_buf = (caddr_t)&kc; 262 kth.ktr_len = (int)sizeof(struct ktr_csw); 263 264 ktrwrite(lp, &kth, NULL); 265 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 266 } 267 #endif 268 269 /* Interface and common routines */ 270 271 #ifdef KTRACE 272 /* 273 * ktrace system call 274 */ 275 struct ktrace_clear_info { 276 ktrace_node_t tracenode; 277 int rootclear; 278 int error; 279 }; 280 281 static int ktrace_clear_callback(struct proc *p, void *data); 282 283 #endif 284 285 /* 286 * MPALMOSTSAFE 287 */ 288 int 289 sys_ktrace(struct sysmsg *sysmsg, const struct ktrace_args *uap) 290 { 291 #ifdef KTRACE 292 struct ktrace_clear_info info; 293 struct thread *td = curthread; 294 struct proc *curp = td->td_proc; 295 struct proc *p; 296 struct pgrp *pg; 297 int facs = uap->facs & ~KTRFAC_ROOT; 298 int ops = KTROP(uap->ops); 299 int descend = uap->ops & KTRFLAG_DESCEND; 300 int ret = 0; 301 int error = 0; 302 struct nlookupdata nd; 303 ktrace_node_t tracenode = NULL; 304 305 lwkt_gettoken(&curp->p_token); 306 curp->p_traceflag |= KTRFAC_ACTIVE; 307 308 if (ops != KTROP_CLEAR) { 309 /* 310 * an operation which requires a file argument. 311 */ 312 error = nlookup_init(&nd, uap->fname, 313 UIO_USERSPACE, NLC_LOCKVP); 314 if (error == 0) 315 error = vn_open(&nd, NULL, FREAD|FWRITE|O_NOFOLLOW, 0); 316 if (error == 0 && nd.nl_open_vp->v_type != VREG) 317 error = EACCES; 318 if (error) { 319 curp->p_traceflag &= ~KTRFAC_ACTIVE; 320 nlookup_done(&nd); 321 goto done; 322 } 323 tracenode = kmalloc(sizeof(struct ktrace_node), M_KTRACE, 324 M_WAITOK | M_ZERO); 325 tracenode->kn_vp = nd.nl_open_vp; 326 tracenode->kn_refs = 1; 327 nd.nl_open_vp = NULL; 328 nlookup_done(&nd); 329 vn_unlock(tracenode->kn_vp); 330 } 331 /* 332 * Clear all uses of the tracefile. Not the most efficient operation 333 * in the world. 334 */ 335 if (ops == KTROP_CLEARFILE) { 336 info.tracenode = tracenode; 337 info.error = 0; 338 info.rootclear = 0; 339 allproc_scan(ktrace_clear_callback, &info, 0); 340 error = info.error; 341 goto done; 342 } 343 /* 344 * need something to (un)trace (XXX - why is this here?) 345 */ 346 if (!facs) { 347 error = EINVAL; 348 goto done; 349 } 350 /* 351 * do it 352 */ 353 if (uap->pid < 0) { 354 /* 355 * By process group. Process group is referenced, preventing 356 * disposal. 357 */ 358 pg = pgfind(-uap->pid); 359 if (pg == NULL) { 360 error = ESRCH; 361 goto done; 362 } 363 lwkt_gettoken(&pg->pg_token); 364 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 365 PHOLD(p); 366 if (descend) 367 ret |= ktrsetchildren(td, p, ops, facs, tracenode); 368 else 369 ret |= ktrops(td, p, ops, facs, tracenode); 370 PRELE(p); 371 } 372 lwkt_reltoken(&pg->pg_token); 373 pgrel(pg); 374 } else { 375 /* 376 * by pid 377 */ 378 p = pfind(uap->pid); 379 if (p == NULL) { 380 error = ESRCH; 381 goto done; 382 } 383 if (descend) 384 ret |= ktrsetchildren(td, p, ops, facs, tracenode); 385 else 386 ret |= ktrops(td, p, ops, facs, tracenode); 387 PRELE(p); 388 } 389 if (!ret) 390 error = EPERM; 391 done: 392 if (tracenode) 393 ktrdestroy(&tracenode); 394 curp->p_traceflag &= ~KTRFAC_ACTIVE; 395 lwkt_reltoken(&curp->p_token); 396 return (error); 397 #else 398 return ENOSYS; 399 #endif 400 } 401 402 #ifdef KTRACE 403 404 /* 405 * NOTE: NOT MPSAFE (yet) 406 */ 407 static int 408 ktrace_clear_callback(struct proc *p, void *data) 409 { 410 struct ktrace_clear_info *info = data; 411 412 if (p->p_tracenode) { 413 if (info->rootclear) { 414 if (p->p_tracenode == info->tracenode) { 415 ktrdestroy(&p->p_tracenode); 416 p->p_traceflag = 0; 417 } 418 } else { 419 if (p->p_tracenode->kn_vp == info->tracenode->kn_vp) { 420 if (ktrcanset(curthread, p)) { 421 ktrdestroy(&p->p_tracenode); 422 p->p_traceflag = 0; 423 } else { 424 info->error = EPERM; 425 } 426 } 427 } 428 } 429 return(0); 430 } 431 432 #endif 433 434 /* 435 * utrace system call 436 * 437 * MPALMOSTSAFE 438 */ 439 int 440 sys_utrace(struct sysmsg *sysmsg, const struct utrace_args *uap) 441 { 442 #ifdef KTRACE 443 struct ktr_header kth; 444 struct thread *td = curthread; /* XXX */ 445 char cp_cache[64]; 446 caddr_t cp; 447 448 if (!KTRPOINT(td, KTR_USER)) 449 return (0); 450 if (uap->len > KTR_USER_MAXLEN) 451 return (EINVAL); 452 td->td_lwp->lwp_traceflag |= KTRFAC_ACTIVE; 453 ktrgetheader(&kth, KTR_USER); 454 if (uap->len <= sizeof(cp_cache)) 455 cp = cp_cache; 456 else 457 cp = kmalloc(uap->len, M_KTRACE, M_WAITOK); 458 459 if (!copyin(uap->addr, cp, uap->len)) { 460 kth.ktr_buf = cp; 461 kth.ktr_len = uap->len; 462 ktrwrite(td->td_lwp, &kth, NULL); 463 } 464 if (cp != cp_cache) 465 kfree(cp, M_KTRACE); 466 td->td_lwp->lwp_traceflag &= ~KTRFAC_ACTIVE; 467 468 return (0); 469 #else 470 return (ENOSYS); 471 #endif 472 } 473 474 void 475 ktrdestroy(struct ktrace_node **tracenodep) 476 { 477 ktrace_node_t tracenode; 478 479 if ((tracenode = *tracenodep) != NULL) { 480 *tracenodep = NULL; 481 KKASSERT(tracenode->kn_refs > 0); 482 if (atomic_fetchadd_int(&tracenode->kn_refs, -1) == 1) { 483 vn_close(tracenode->kn_vp, FREAD|FWRITE, NULL); 484 tracenode->kn_vp = NULL; 485 kfree(tracenode, M_KTRACE); 486 } 487 } 488 } 489 490 /* 491 * This allows a process to inherit a ref on a tracenode and is also used 492 * as a temporary ref to prevent a tracenode from being destroyed out from 493 * under an active operation. 494 */ 495 ktrace_node_t 496 ktrinherit(ktrace_node_t tracenode) 497 { 498 if (tracenode) { 499 KKASSERT(tracenode->kn_refs > 0); 500 atomic_add_int(&tracenode->kn_refs, 1); 501 } 502 return(tracenode); 503 } 504 505 #ifdef KTRACE 506 static int 507 ktrops(struct thread *td, struct proc *p, int ops, int facs, 508 ktrace_node_t tracenode) 509 { 510 ktrace_node_t oldnode; 511 512 if (!ktrcanset(td, p)) 513 return (0); 514 if (ops == KTROP_SET) { 515 if ((oldnode = p->p_tracenode) != tracenode) { 516 p->p_tracenode = ktrinherit(tracenode); 517 ktrdestroy(&oldnode); 518 } 519 p->p_traceflag |= facs; 520 if (td->td_ucred->cr_uid == 0) 521 p->p_traceflag |= KTRFAC_ROOT; 522 } else { 523 /* KTROP_CLEAR */ 524 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 525 /* no more tracing */ 526 p->p_traceflag = 0; 527 ktrdestroy(&p->p_tracenode); 528 } 529 } 530 531 return (1); 532 } 533 534 static int 535 ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, 536 ktrace_node_t tracenode) 537 { 538 struct proc *p; 539 struct proc *np; 540 int ret = 0; 541 542 p = top; 543 PHOLD(p); 544 lwkt_gettoken(&p->p_token); 545 546 for (;;) { 547 ret |= ktrops(td, p, ops, facs, tracenode); 548 549 /* 550 * If this process has children, descend to them next, 551 * otherwise do any siblings, and if done with this level, 552 * follow back up the tree (but not past top). 553 */ 554 if ((np = LIST_FIRST(&p->p_children)) != NULL) { 555 PHOLD(np); 556 } 557 while (np == NULL) { 558 if (p == top) 559 break; 560 if ((np = LIST_NEXT(p, p_sibling)) != NULL) { 561 PHOLD(np); 562 break; 563 } 564 565 /* 566 * recurse up to parent, set p in our inner 567 * loop when doing this. np can be NULL if 568 * we race a reparenting to init (thus 'top' 569 * is skipped past and never encountered). 570 */ 571 np = p->p_pptr; 572 if (np == NULL) 573 break; 574 PHOLD(np); 575 lwkt_reltoken(&p->p_token); 576 PRELE(p); 577 p = np; 578 lwkt_gettoken(&p->p_token); 579 np = NULL; 580 } 581 lwkt_reltoken(&p->p_token); 582 PRELE(p); 583 p = np; 584 if (p == NULL) 585 break; 586 /* Already held, but we need the token too */ 587 lwkt_gettoken(&p->p_token); 588 } 589 return (ret); 590 } 591 592 static void 593 ktrwrite(struct lwp *lp, struct ktr_header *kth, struct uio *uio) 594 { 595 struct ktrace_clear_info info; 596 struct uio auio; 597 struct iovec aiov[2]; 598 int error; 599 ktrace_node_t tracenode; 600 601 /* 602 * We have to ref our tracenode to prevent it from being ripped out 603 * from under us while we are trying to use it. p_tracenode can 604 * go away at any time if another process gets a write error. 605 * 606 * XXX not MP safe 607 */ 608 if (lp->lwp_proc->p_tracenode == NULL) 609 return; 610 tracenode = ktrinherit(lp->lwp_proc->p_tracenode); 611 auio.uio_iov = &aiov[0]; 612 auio.uio_offset = 0; 613 auio.uio_segflg = UIO_SYSSPACE; 614 auio.uio_rw = UIO_WRITE; 615 aiov[0].iov_base = (caddr_t)kth; 616 aiov[0].iov_len = sizeof(struct ktr_header); 617 auio.uio_resid = sizeof(struct ktr_header); 618 auio.uio_iovcnt = 1; 619 auio.uio_td = curthread; 620 if (kth->ktr_len > 0) { 621 auio.uio_iovcnt++; 622 aiov[1].iov_base = kth->ktr_buf; 623 aiov[1].iov_len = kth->ktr_len; 624 auio.uio_resid += kth->ktr_len; 625 if (uio != NULL) 626 kth->ktr_len += uio->uio_resid; 627 } 628 629 /* 630 * NOTE: Must set timestamp after obtaining lock to ensure no 631 * timestamp reversals in the output file. 632 */ 633 vn_lock(tracenode->kn_vp, LK_EXCLUSIVE | LK_RETRY); 634 microtime(&kth->ktr_time); 635 error = VOP_WRITE(tracenode->kn_vp, &auio, 636 IO_UNIT | IO_APPEND, lp->lwp_thread->td_ucred); 637 if (error == 0 && uio != NULL) { 638 error = VOP_WRITE(tracenode->kn_vp, uio, 639 IO_UNIT | IO_APPEND, lp->lwp_thread->td_ucred); 640 } 641 vn_unlock(tracenode->kn_vp); 642 if (error) { 643 /* 644 * If an error occured, give up tracing on all processes 645 * using this tracenode. This is not MP safe but is 646 * blocking-safe. 647 */ 648 log(LOG_NOTICE, 649 "ktrace write failed, errno %d, tracing stopped\n", error); 650 info.tracenode = tracenode; 651 info.error = 0; 652 info.rootclear = 1; 653 allproc_scan(ktrace_clear_callback, &info, 0); 654 } 655 ktrdestroy(&tracenode); 656 } 657 658 /* 659 * Return true if caller has permission to set the ktracing state 660 * of target. Essentially, the target can't possess any 661 * more permissions than the caller. KTRFAC_ROOT signifies that 662 * root previously set the tracing status on the target process, and 663 * so, only root may further change it. 664 * 665 * TODO: check groups. use caller effective gid. 666 */ 667 static int 668 ktrcanset(struct thread *calltd, struct proc *targetp) 669 { 670 struct ucred *caller = calltd->td_ucred; 671 struct ucred *target = targetp->p_ucred; 672 673 if (!PRISON_CHECK(caller, target)) 674 return (0); 675 if ((caller->cr_uid == target->cr_ruid && 676 target->cr_ruid == target->cr_svuid && 677 caller->cr_rgid == target->cr_rgid && /* XXX */ 678 target->cr_rgid == target->cr_svgid && 679 (targetp->p_traceflag & KTRFAC_ROOT) == 0 && 680 (targetp->p_flags & P_SUGID) == 0) || 681 caller->cr_uid == 0) 682 return (1); 683 684 return (0); 685 } 686 687 #endif /* KTRACE */ 688