1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 30 * $FreeBSD: src/sys/kern/kern_ktrace.c,v 1.35.2.6 2002/07/05 22:36:38 darrenr Exp $ 31 */ 32 33 #include "opt_ktrace.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/uio.h> 38 #include <sys/sysproto.h> 39 #include <sys/kernel.h> 40 #include <sys/proc.h> 41 #include <sys/fcntl.h> 42 #include <sys/lock.h> 43 #include <sys/nlookup.h> 44 #include <sys/vnode.h> 45 #include <sys/ktrace.h> 46 #include <sys/malloc.h> 47 #include <sys/syslog.h> 48 #include <sys/sysent.h> 49 50 #include <vm/vm_zone.h> 51 52 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 53 54 #ifdef KTRACE 55 static void ktrgetheader (struct ktr_header *kth, int type); 56 static struct ktr_syscall *ktrgetsyscall(struct ktr_header *kth, 57 struct ktr_syscall *ktp_cache, int narg); 58 static void ktrputsyscall(struct ktr_syscall *ktp_cache, 59 struct ktr_syscall *ktp); 60 static void ktrwrite (struct lwp *, struct ktr_header *, struct uio *); 61 static int ktrcanset (struct thread *,struct proc *); 62 static int ktrsetchildren (struct thread *, struct proc *, 63 int, int, ktrace_node_t); 64 static int ktrops (struct thread *,struct proc *,int,int, ktrace_node_t); 65 66 /* 67 * MPSAFE 68 */ 69 static 70 void 71 ktrgetheader(struct ktr_header *kth, int type) 72 { 73 thread_t td = curthread; 74 struct proc *p = td->td_proc; 75 struct lwp *lp = td->td_lwp; 76 77 kth->ktr_type = type; 78 /* XXX threaded flag is a hack at the moment */ 79 kth->ktr_flags = (p->p_nthreads > 1) ? KTRH_THREADED : 0; 80 kth->ktr_flags |= KTRH_CPUID_ENCODE(td->td_gd->gd_cpuid); 81 /*microtime(&kth->ktr_time); set in ktrwrite */ 82 kth->ktr_pid = p->p_pid; 83 kth->ktr_tid = lp->lwp_tid; 84 bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN + 1); 85 } 86 87 static 88 struct ktr_syscall * 89 ktrgetsyscall(struct ktr_header *kth, struct ktr_syscall *ktp_cache, int narg) 90 { 91 size_t len; 92 93 len = offsetof(struct ktr_syscall, ktr_args[narg]); 94 if (len > sizeof(*ktp_cache)) 95 ktp_cache = kmalloc(len, M_KTRACE, M_WAITOK); 96 kth->ktr_buf = (caddr_t)ktp_cache; 97 kth->ktr_len = (int)len; 98 return (ktp_cache); 99 } 100 101 static 102 void 103 ktrputsyscall(struct ktr_syscall *ktp_cache, struct ktr_syscall *ktp) 104 { 105 if (ktp != ktp_cache) 106 kfree(ktp, M_KTRACE); 107 } 108 109 void 110 ktrsyscall(struct lwp *lp, int code, int narg, register_t args[]) 111 { 112 struct ktr_header kth; 113 struct ktr_syscall ktp_cache; 114 struct ktr_syscall *ktp; 115 register_t *argp; 116 int i; 117 118 /* 119 * Setting the active bit prevents a ktrace recursion from the 120 * ktracing op itself. 121 */ 122 lp->lwp_traceflag |= KTRFAC_ACTIVE; 123 ktrgetheader(&kth, KTR_SYSCALL); 124 125 ktp = ktrgetsyscall(&kth, &ktp_cache, narg); 126 ktp->ktr_code = code; 127 ktp->ktr_narg = narg; 128 argp = &ktp->ktr_args[0]; 129 for (i = 0; i < narg; i++) 130 *argp++ = args[i]; 131 ktrwrite(lp, &kth, NULL); 132 133 ktrputsyscall(&ktp_cache, ktp); 134 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 135 } 136 137 void 138 ktrsysret(struct lwp *lp, int code, int error, register_t retval) 139 { 140 struct ktr_header kth; 141 struct ktr_sysret ktp; 142 143 lp->lwp_traceflag |= KTRFAC_ACTIVE; 144 ktrgetheader(&kth, KTR_SYSRET); 145 146 ktp.ktr_code = code; 147 ktp.ktr_error = error; 148 if (error == 0) 149 ktp.ktr_retval = retval; /* what about val2 ? */ 150 else 151 ktp.ktr_retval = 0; 152 153 kth.ktr_buf = (caddr_t)&ktp; 154 kth.ktr_len = (int)sizeof(struct ktr_sysret); 155 156 ktrwrite(lp, &kth, NULL); 157 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 158 } 159 160 void 161 ktrnamei(struct lwp *lp, char *path) 162 { 163 struct ktr_header kth; 164 165 lp->lwp_traceflag |= KTRFAC_ACTIVE; 166 ktrgetheader(&kth, KTR_NAMEI); 167 168 kth.ktr_len = (int)strlen(path); 169 kth.ktr_buf = path; 170 171 ktrwrite(lp, &kth, NULL); 172 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 173 } 174 175 void 176 ktrgenio(struct lwp *lp, int fd, enum uio_rw rw, struct uio *uio, int error) 177 { 178 struct ktr_header kth; 179 struct ktr_genio ktg; 180 181 if (error) 182 return; 183 lp->lwp_traceflag |= KTRFAC_ACTIVE; 184 ktrgetheader(&kth, KTR_GENIO); 185 186 ktg.ktr_fd = fd; 187 ktg.ktr_rw = rw; 188 kth.ktr_buf = (caddr_t)&ktg; 189 kth.ktr_len = (int)sizeof(struct ktr_genio); 190 uio->uio_offset = 0; 191 uio->uio_rw = UIO_WRITE; 192 193 ktrwrite(lp, &kth, uio); 194 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 195 } 196 197 void 198 ktrpsig(struct lwp *lp, int sig, sig_t action, sigset_t *mask, int code) 199 { 200 struct ktr_header kth; 201 struct ktr_psig kp; 202 203 lp->lwp_traceflag |= KTRFAC_ACTIVE; 204 ktrgetheader(&kth, KTR_PSIG); 205 206 kp.signo = (char)sig; 207 kp.action = action; 208 kp.mask = *mask; 209 kp.code = code; 210 kth.ktr_buf = (caddr_t)&kp; 211 kth.ktr_len = (int)sizeof(struct ktr_psig); 212 213 ktrwrite(lp, &kth, NULL); 214 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 215 } 216 217 void 218 ktrcsw(struct lwp *lp, int out, int user) 219 { 220 struct ktr_header kth; 221 struct ktr_csw kc; 222 223 lp->lwp_traceflag |= KTRFAC_ACTIVE; 224 ktrgetheader(&kth, KTR_CSW); 225 226 kc.out = out; 227 kc.user = user; 228 kth.ktr_buf = (caddr_t)&kc; 229 kth.ktr_len = (int)sizeof(struct ktr_csw); 230 231 ktrwrite(lp, &kth, NULL); 232 lp->lwp_traceflag &= ~KTRFAC_ACTIVE; 233 } 234 #endif 235 236 /* Interface and common routines */ 237 238 #ifdef KTRACE 239 /* 240 * ktrace system call 241 */ 242 struct ktrace_clear_info { 243 ktrace_node_t tracenode; 244 int rootclear; 245 int error; 246 }; 247 248 static int ktrace_clear_callback(struct proc *p, void *data); 249 250 #endif 251 252 /* 253 * MPALMOSTSAFE 254 */ 255 int 256 sys_ktrace(struct ktrace_args *uap) 257 { 258 #ifdef KTRACE 259 struct ktrace_clear_info info; 260 struct thread *td = curthread; 261 struct proc *curp = td->td_proc; 262 struct proc *p; 263 struct pgrp *pg; 264 int facs = uap->facs & ~KTRFAC_ROOT; 265 int ops = KTROP(uap->ops); 266 int descend = uap->ops & KTRFLAG_DESCEND; 267 int ret = 0; 268 int error = 0; 269 struct nlookupdata nd; 270 ktrace_node_t tracenode = NULL; 271 272 lwkt_gettoken(&curp->p_token); 273 curp->p_traceflag |= KTRFAC_ACTIVE; 274 275 if (ops != KTROP_CLEAR) { 276 /* 277 * an operation which requires a file argument. 278 */ 279 error = nlookup_init(&nd, uap->fname, 280 UIO_USERSPACE, NLC_LOCKVP); 281 if (error == 0) 282 error = vn_open(&nd, NULL, FREAD|FWRITE|O_NOFOLLOW, 0); 283 if (error == 0 && nd.nl_open_vp->v_type != VREG) 284 error = EACCES; 285 if (error) { 286 curp->p_traceflag &= ~KTRFAC_ACTIVE; 287 nlookup_done(&nd); 288 goto done; 289 } 290 tracenode = kmalloc(sizeof(struct ktrace_node), M_KTRACE, 291 M_WAITOK | M_ZERO); 292 tracenode->kn_vp = nd.nl_open_vp; 293 tracenode->kn_refs = 1; 294 nd.nl_open_vp = NULL; 295 nlookup_done(&nd); 296 vn_unlock(tracenode->kn_vp); 297 } 298 /* 299 * Clear all uses of the tracefile. Not the most efficient operation 300 * in the world. 301 */ 302 if (ops == KTROP_CLEARFILE) { 303 info.tracenode = tracenode; 304 info.error = 0; 305 info.rootclear = 0; 306 allproc_scan(ktrace_clear_callback, &info, 0); 307 error = info.error; 308 goto done; 309 } 310 /* 311 * need something to (un)trace (XXX - why is this here?) 312 */ 313 if (!facs) { 314 error = EINVAL; 315 goto done; 316 } 317 /* 318 * do it 319 */ 320 if (uap->pid < 0) { 321 /* 322 * By process group. Process group is referenced, preventing 323 * disposal. 324 */ 325 pg = pgfind(-uap->pid); 326 if (pg == NULL) { 327 error = ESRCH; 328 goto done; 329 } 330 lwkt_gettoken(&pg->pg_token); 331 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 332 PHOLD(p); 333 if (descend) 334 ret |= ktrsetchildren(td, p, ops, facs, tracenode); 335 else 336 ret |= ktrops(td, p, ops, facs, tracenode); 337 PRELE(p); 338 } 339 lwkt_reltoken(&pg->pg_token); 340 pgrel(pg); 341 } else { 342 /* 343 * by pid 344 */ 345 p = pfind(uap->pid); 346 if (p == NULL) { 347 error = ESRCH; 348 goto done; 349 } 350 if (descend) 351 ret |= ktrsetchildren(td, p, ops, facs, tracenode); 352 else 353 ret |= ktrops(td, p, ops, facs, tracenode); 354 PRELE(p); 355 } 356 if (!ret) 357 error = EPERM; 358 done: 359 if (tracenode) 360 ktrdestroy(&tracenode); 361 curp->p_traceflag &= ~KTRFAC_ACTIVE; 362 lwkt_reltoken(&curp->p_token); 363 return (error); 364 #else 365 return ENOSYS; 366 #endif 367 } 368 369 #ifdef KTRACE 370 371 /* 372 * NOTE: NOT MPSAFE (yet) 373 */ 374 static int 375 ktrace_clear_callback(struct proc *p, void *data) 376 { 377 struct ktrace_clear_info *info = data; 378 379 if (p->p_tracenode) { 380 if (info->rootclear) { 381 if (p->p_tracenode == info->tracenode) { 382 ktrdestroy(&p->p_tracenode); 383 p->p_traceflag = 0; 384 } 385 } else { 386 if (p->p_tracenode->kn_vp == info->tracenode->kn_vp) { 387 if (ktrcanset(curthread, p)) { 388 ktrdestroy(&p->p_tracenode); 389 p->p_traceflag = 0; 390 } else { 391 info->error = EPERM; 392 } 393 } 394 } 395 } 396 return(0); 397 } 398 399 #endif 400 401 /* 402 * utrace system call 403 * 404 * MPALMOSTSAFE 405 */ 406 int 407 sys_utrace(struct utrace_args *uap) 408 { 409 #ifdef KTRACE 410 struct ktr_header kth; 411 struct thread *td = curthread; /* XXX */ 412 char cp_cache[64]; 413 caddr_t cp; 414 415 if (!KTRPOINT(td, KTR_USER)) 416 return (0); 417 if (uap->len > KTR_USER_MAXLEN) 418 return (EINVAL); 419 td->td_lwp->lwp_traceflag |= KTRFAC_ACTIVE; 420 ktrgetheader(&kth, KTR_USER); 421 if (uap->len <= sizeof(cp_cache)) 422 cp = cp_cache; 423 else 424 cp = kmalloc(uap->len, M_KTRACE, M_WAITOK); 425 426 if (!copyin(uap->addr, cp, uap->len)) { 427 kth.ktr_buf = cp; 428 kth.ktr_len = uap->len; 429 ktrwrite(td->td_lwp, &kth, NULL); 430 } 431 if (cp != cp_cache) 432 kfree(cp, M_KTRACE); 433 td->td_lwp->lwp_traceflag &= ~KTRFAC_ACTIVE; 434 435 return (0); 436 #else 437 return (ENOSYS); 438 #endif 439 } 440 441 void 442 ktrdestroy(struct ktrace_node **tracenodep) 443 { 444 ktrace_node_t tracenode; 445 446 if ((tracenode = *tracenodep) != NULL) { 447 *tracenodep = NULL; 448 KKASSERT(tracenode->kn_refs > 0); 449 if (atomic_fetchadd_int(&tracenode->kn_refs, -1) == 1) { 450 vn_close(tracenode->kn_vp, FREAD|FWRITE, NULL); 451 tracenode->kn_vp = NULL; 452 kfree(tracenode, M_KTRACE); 453 } 454 } 455 } 456 457 /* 458 * This allows a process to inherit a ref on a tracenode and is also used 459 * as a temporary ref to prevent a tracenode from being destroyed out from 460 * under an active operation. 461 */ 462 ktrace_node_t 463 ktrinherit(ktrace_node_t tracenode) 464 { 465 if (tracenode) { 466 KKASSERT(tracenode->kn_refs > 0); 467 atomic_add_int(&tracenode->kn_refs, 1); 468 } 469 return(tracenode); 470 } 471 472 #ifdef KTRACE 473 static int 474 ktrops(struct thread *td, struct proc *p, int ops, int facs, 475 ktrace_node_t tracenode) 476 { 477 ktrace_node_t oldnode; 478 479 if (!ktrcanset(td, p)) 480 return (0); 481 if (ops == KTROP_SET) { 482 if ((oldnode = p->p_tracenode) != tracenode) { 483 p->p_tracenode = ktrinherit(tracenode); 484 ktrdestroy(&oldnode); 485 } 486 p->p_traceflag |= facs; 487 if (td->td_ucred->cr_uid == 0) 488 p->p_traceflag |= KTRFAC_ROOT; 489 } else { 490 /* KTROP_CLEAR */ 491 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 492 /* no more tracing */ 493 p->p_traceflag = 0; 494 ktrdestroy(&p->p_tracenode); 495 } 496 } 497 498 return (1); 499 } 500 501 static int 502 ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, 503 ktrace_node_t tracenode) 504 { 505 struct proc *p; 506 struct proc *np; 507 int ret = 0; 508 509 p = top; 510 PHOLD(p); 511 lwkt_gettoken(&p->p_token); 512 513 for (;;) { 514 ret |= ktrops(td, p, ops, facs, tracenode); 515 516 /* 517 * If this process has children, descend to them next, 518 * otherwise do any siblings, and if done with this level, 519 * follow back up the tree (but not past top). 520 */ 521 if ((np = LIST_FIRST(&p->p_children)) != NULL) { 522 PHOLD(np); 523 } 524 while (np == NULL) { 525 if (p == top) 526 break; 527 if ((np = LIST_NEXT(p, p_sibling)) != NULL) { 528 PHOLD(np); 529 break; 530 } 531 532 /* 533 * recurse up to parent, set p in our inner 534 * loop when doing this. np can be NULL if 535 * we race a reparenting to init (thus 'top' 536 * is skipped past and never encountered). 537 */ 538 np = p->p_pptr; 539 if (np == NULL) 540 break; 541 PHOLD(np); 542 lwkt_reltoken(&p->p_token); 543 PRELE(p); 544 p = np; 545 lwkt_gettoken(&p->p_token); 546 np = NULL; 547 } 548 lwkt_reltoken(&p->p_token); 549 PRELE(p); 550 p = np; 551 if (p == NULL) 552 break; 553 /* Already held, but we need the token too */ 554 lwkt_gettoken(&p->p_token); 555 } 556 return (ret); 557 } 558 559 static void 560 ktrwrite(struct lwp *lp, struct ktr_header *kth, struct uio *uio) 561 { 562 struct ktrace_clear_info info; 563 struct uio auio; 564 struct iovec aiov[2]; 565 int error; 566 ktrace_node_t tracenode; 567 568 /* 569 * We have to ref our tracenode to prevent it from being ripped out 570 * from under us while we are trying to use it. p_tracenode can 571 * go away at any time if another process gets a write error. 572 * 573 * XXX not MP safe 574 */ 575 if (lp->lwp_proc->p_tracenode == NULL) 576 return; 577 tracenode = ktrinherit(lp->lwp_proc->p_tracenode); 578 auio.uio_iov = &aiov[0]; 579 auio.uio_offset = 0; 580 auio.uio_segflg = UIO_SYSSPACE; 581 auio.uio_rw = UIO_WRITE; 582 aiov[0].iov_base = (caddr_t)kth; 583 aiov[0].iov_len = sizeof(struct ktr_header); 584 auio.uio_resid = sizeof(struct ktr_header); 585 auio.uio_iovcnt = 1; 586 auio.uio_td = curthread; 587 if (kth->ktr_len > 0) { 588 auio.uio_iovcnt++; 589 aiov[1].iov_base = kth->ktr_buf; 590 aiov[1].iov_len = kth->ktr_len; 591 auio.uio_resid += kth->ktr_len; 592 if (uio != NULL) 593 kth->ktr_len += uio->uio_resid; 594 } 595 596 /* 597 * NOTE: Must set timestamp after obtaining lock to ensure no 598 * timestamp reversals in the output file. 599 */ 600 vn_lock(tracenode->kn_vp, LK_EXCLUSIVE | LK_RETRY); 601 microtime(&kth->ktr_time); 602 error = VOP_WRITE(tracenode->kn_vp, &auio, 603 IO_UNIT | IO_APPEND, lp->lwp_thread->td_ucred); 604 if (error == 0 && uio != NULL) { 605 error = VOP_WRITE(tracenode->kn_vp, uio, 606 IO_UNIT | IO_APPEND, lp->lwp_thread->td_ucred); 607 } 608 vn_unlock(tracenode->kn_vp); 609 if (error) { 610 /* 611 * If an error occured, give up tracing on all processes 612 * using this tracenode. This is not MP safe but is 613 * blocking-safe. 614 */ 615 log(LOG_NOTICE, 616 "ktrace write failed, errno %d, tracing stopped\n", error); 617 info.tracenode = tracenode; 618 info.error = 0; 619 info.rootclear = 1; 620 allproc_scan(ktrace_clear_callback, &info, 0); 621 } 622 ktrdestroy(&tracenode); 623 } 624 625 /* 626 * Return true if caller has permission to set the ktracing state 627 * of target. Essentially, the target can't possess any 628 * more permissions than the caller. KTRFAC_ROOT signifies that 629 * root previously set the tracing status on the target process, and 630 * so, only root may further change it. 631 * 632 * TODO: check groups. use caller effective gid. 633 */ 634 static int 635 ktrcanset(struct thread *calltd, struct proc *targetp) 636 { 637 struct ucred *caller = calltd->td_ucred; 638 struct ucred *target = targetp->p_ucred; 639 640 if (!PRISON_CHECK(caller, target)) 641 return (0); 642 if ((caller->cr_uid == target->cr_ruid && 643 target->cr_ruid == target->cr_svuid && 644 caller->cr_rgid == target->cr_rgid && /* XXX */ 645 target->cr_rgid == target->cr_svgid && 646 (targetp->p_traceflag & KTRFAC_ROOT) == 0 && 647 (targetp->p_flags & P_SUGID) == 0) || 648 caller->cr_uid == 0) 649 return (1); 650 651 return (0); 652 } 653 654 #endif /* KTRACE */ 655