1 /* $OpenBSD: kern_ktrace.c,v 1.88 2016/03/19 12:04:15 natano Exp $ */ 2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/sched.h> 39 #include <sys/file.h> 40 #include <sys/namei.h> 41 #include <sys/vnode.h> 42 #include <sys/lock.h> 43 #include <sys/ktrace.h> 44 #include <sys/malloc.h> 45 #include <sys/syslog.h> 46 #include <sys/sysctl.h> 47 #include <sys/pledge.h> 48 49 #include <sys/mount.h> 50 #include <sys/syscall.h> 51 #include <sys/syscallargs.h> 52 53 #include <uvm/uvm_extern.h> 54 55 void ktrinitheaderraw(struct ktr_header *, uint, pid_t, pid_t); 56 void ktrinitheader(struct ktr_header *, struct proc *, int); 57 void ktrstart(struct proc *, struct vnode *, struct ucred *); 58 int ktrops(struct proc *, struct process *, int, int, struct vnode *, 59 struct ucred *); 60 int ktrsetchildren(struct proc *, struct process *, int, int, 61 struct vnode *, struct ucred *); 62 int ktrwrite(struct proc *, struct ktr_header *, const void *, size_t); 63 int ktrwrite2(struct proc *, struct ktr_header *, const void *, size_t, 64 const void *, size_t); 65 int ktrwriteraw(struct proc *, struct vnode *, struct ucred *, 66 struct ktr_header *, struct iovec *); 67 int ktrcanset(struct proc *, struct process *); 68 69 /* 70 * Clear the trace settings in a correct way (to avoid races). 71 */ 72 void 73 ktrcleartrace(struct process *pr) 74 { 75 struct vnode *vp; 76 struct ucred *cred; 77 78 if (pr->ps_tracevp != NULL) { 79 vp = pr->ps_tracevp; 80 cred = pr->ps_tracecred; 81 82 pr->ps_traceflag = 0; 83 pr->ps_tracevp = NULL; 84 pr->ps_tracecred = NULL; 85 86 vrele(vp); 87 crfree(cred); 88 } 89 } 90 91 /* 92 * Change the trace setting in a correct way (to avoid races). 93 */ 94 void 95 ktrsettrace(struct process *pr, int facs, struct vnode *newvp, 96 struct ucred *newcred) 97 { 98 struct vnode *oldvp; 99 struct ucred *oldcred; 100 101 KASSERT(newvp != NULL); 102 KASSERT(newcred != NULL); 103 104 pr->ps_traceflag |= facs; 105 106 /* nothing to change about where the trace goes? */ 107 if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred) 108 return; 109 110 vref(newvp); 111 crhold(newcred); 112 113 oldvp = pr->ps_tracevp; 114 oldcred = pr->ps_tracecred; 115 116 pr->ps_tracevp = newvp; 117 pr->ps_tracecred = newcred; 118 119 if (oldvp != NULL) { 120 vrele(oldvp); 121 crfree(oldcred); 122 } 123 } 124 125 void 126 ktrinitheaderraw(struct ktr_header *kth, uint type, pid_t pid, pid_t tid) 127 { 128 memset(kth, 0, sizeof(struct ktr_header)); 129 kth->ktr_type = type; 130 nanotime(&kth->ktr_time); 131 kth->ktr_pid = pid; 132 kth->ktr_tid = tid; 133 } 134 135 void 136 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 137 { 138 ktrinitheaderraw(kth, type, p->p_p->ps_pid, 139 p->p_pid + THREAD_PID_OFFSET); 140 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN); 141 } 142 143 void 144 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred) 145 { 146 struct ktr_header kth; 147 148 ktrinitheaderraw(&kth, htobe32(KTR_START), -1, -1); 149 ktrwriteraw(p, vp, cred, &kth, NULL); 150 } 151 152 void 153 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[]) 154 { 155 struct ktr_header kth; 156 struct ktr_syscall *ktp; 157 size_t len = sizeof(struct ktr_syscall) + argsize; 158 register_t *argp; 159 u_int nargs = 0; 160 int i; 161 162 if (code == SYS_sysctl) { 163 /* 164 * The sysctl encoding stores the mib[] 165 * array because it is interesting. 166 */ 167 if (args[1] > 0) 168 nargs = lmin(args[1], CTL_MAXNAME); 169 len += nargs * sizeof(int); 170 } 171 atomic_setbits_int(&p->p_flag, P_INKTR); 172 ktrinitheader(&kth, p, KTR_SYSCALL); 173 ktp = malloc(len, M_TEMP, M_WAITOK); 174 ktp->ktr_code = code; 175 ktp->ktr_argsize = argsize; 176 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 177 for (i = 0; i < (argsize / sizeof *argp); i++) 178 *argp++ = args[i]; 179 if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int))) 180 memset(argp, 0, nargs * sizeof(int)); 181 ktrwrite(p, &kth, ktp, len); 182 free(ktp, M_TEMP, len); 183 atomic_clearbits_int(&p->p_flag, P_INKTR); 184 } 185 186 void 187 ktrsysret(struct proc *p, register_t code, int error, 188 const register_t retval[2]) 189 { 190 struct ktr_header kth; 191 struct ktr_sysret ktp; 192 int len; 193 194 atomic_setbits_int(&p->p_flag, P_INKTR); 195 ktrinitheader(&kth, p, KTR_SYSRET); 196 ktp.ktr_code = code; 197 ktp.ktr_error = error; 198 if (error) 199 len = 0; 200 else if (code == SYS_lseek) 201 /* the one exception: lseek on ILP32 needs more */ 202 len = sizeof(long long); 203 else 204 len = sizeof(register_t); 205 ktrwrite2(p, &kth, &ktp, sizeof(ktp), retval, len); 206 atomic_clearbits_int(&p->p_flag, P_INKTR); 207 } 208 209 void 210 ktrnamei(struct proc *p, char *path) 211 { 212 struct ktr_header kth; 213 214 atomic_setbits_int(&p->p_flag, P_INKTR); 215 ktrinitheader(&kth, p, KTR_NAMEI); 216 ktrwrite(p, &kth, path, strlen(path)); 217 atomic_clearbits_int(&p->p_flag, P_INKTR); 218 } 219 220 void 221 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 222 ssize_t len) 223 { 224 struct ktr_header kth; 225 struct ktr_genio ktp; 226 caddr_t cp; 227 int count; 228 int buflen; 229 230 atomic_setbits_int(&p->p_flag, P_INKTR); 231 232 /* beware overflow */ 233 if (len > PAGE_SIZE) 234 buflen = PAGE_SIZE; 235 else 236 buflen = len + sizeof(struct ktr_genio); 237 238 ktrinitheader(&kth, p, KTR_GENIO); 239 ktp.ktr_fd = fd; 240 ktp.ktr_rw = rw; 241 242 cp = malloc(buflen, M_TEMP, M_WAITOK); 243 while (len > 0) { 244 /* 245 * Don't allow this process to hog the cpu when doing 246 * huge I/O. 247 */ 248 if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) 249 preempt(NULL); 250 251 count = lmin(iov->iov_len, buflen); 252 if (count > len) 253 count = len; 254 if (copyin(iov->iov_base, cp, count)) 255 break; 256 257 if (ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count) != 0) 258 break; 259 260 iov->iov_len -= count; 261 iov->iov_base = (caddr_t)iov->iov_base + count; 262 263 if (iov->iov_len == 0) 264 iov++; 265 266 len -= count; 267 } 268 269 free(cp, M_TEMP, buflen); 270 atomic_clearbits_int(&p->p_flag, P_INKTR); 271 } 272 273 void 274 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code, 275 siginfo_t *si) 276 { 277 struct ktr_header kth; 278 struct ktr_psig kp; 279 280 atomic_setbits_int(&p->p_flag, P_INKTR); 281 ktrinitheader(&kth, p, KTR_PSIG); 282 kp.signo = (char)sig; 283 kp.action = action; 284 kp.mask = mask; 285 kp.code = code; 286 kp.si = *si; 287 288 ktrwrite(p, &kth, &kp, sizeof(kp)); 289 atomic_clearbits_int(&p->p_flag, P_INKTR); 290 } 291 292 void 293 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen) 294 { 295 struct ktr_header kth; 296 297 KERNEL_ASSERT_LOCKED(); 298 atomic_setbits_int(&p->p_flag, P_INKTR); 299 ktrinitheader(&kth, p, KTR_STRUCT); 300 301 if (data == NULL) 302 datalen = 0; 303 ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen); 304 atomic_clearbits_int(&p->p_flag, P_INKTR); 305 } 306 307 int 308 ktruser(struct proc *p, const char *id, const void *addr, size_t len) 309 { 310 struct ktr_header kth; 311 struct ktr_user ktp; 312 int error; 313 void *memp; 314 #define STK_PARAMS 128 315 long long stkbuf[STK_PARAMS / sizeof(long long)]; 316 317 if (!KTRPOINT(p, KTR_USER)) 318 return (0); 319 if (len > KTR_USER_MAXLEN) 320 return (EINVAL); 321 322 atomic_setbits_int(&p->p_flag, P_INKTR); 323 ktrinitheader(&kth, p, KTR_USER); 324 memset(ktp.ktr_id, 0, KTR_USER_MAXIDLEN); 325 error = copyinstr(id, ktp.ktr_id, KTR_USER_MAXIDLEN, NULL); 326 if (error == 0) { 327 if (len > sizeof(stkbuf)) 328 memp = malloc(len, M_TEMP, M_WAITOK); 329 else 330 memp = stkbuf; 331 error = copyin(addr, memp, len); 332 if (error == 0) 333 ktrwrite2(p, &kth, &ktp, sizeof(ktp), memp, len); 334 if (memp != stkbuf) 335 free(memp, M_TEMP, len); 336 } 337 atomic_clearbits_int(&p->p_flag, P_INKTR); 338 return (error); 339 } 340 341 void 342 ktrexec(struct proc *p, int type, const char *data, ssize_t len) 343 { 344 struct ktr_header kth; 345 int count; 346 int buflen; 347 348 assert(type == KTR_EXECARGS || type == KTR_EXECENV); 349 atomic_setbits_int(&p->p_flag, P_INKTR); 350 351 /* beware overflow */ 352 if (len > PAGE_SIZE) 353 buflen = PAGE_SIZE; 354 else 355 buflen = len; 356 357 ktrinitheader(&kth, p, type); 358 359 while (len > 0) { 360 /* 361 * Don't allow this process to hog the cpu when doing 362 * huge I/O. 363 */ 364 if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) 365 preempt(NULL); 366 367 count = lmin(len, buflen); 368 if (ktrwrite(p, &kth, data, count) != 0) 369 break; 370 371 len -= count; 372 data += count; 373 } 374 375 atomic_clearbits_int(&p->p_flag, P_INKTR); 376 } 377 378 void 379 ktrpledge(struct proc *p, int error, uint64_t code, int syscall) 380 { 381 struct ktr_header kth; 382 struct ktr_pledge kp; 383 384 atomic_setbits_int(&p->p_flag, P_INKTR); 385 ktrinitheader(&kth, p, KTR_PLEDGE); 386 kp.error = error; 387 kp.code = code; 388 kp.syscall = syscall; 389 390 ktrwrite(p, &kth, &kp, sizeof(kp)); 391 atomic_clearbits_int(&p->p_flag, P_INKTR); 392 } 393 394 /* Interface and common routines */ 395 396 /* 397 * ktrace system call 398 */ 399 int 400 sys_ktrace(struct proc *p, void *v, register_t *retval) 401 { 402 struct sys_ktrace_args /* { 403 syscallarg(const char *) fname; 404 syscallarg(int) ops; 405 syscallarg(int) facs; 406 syscallarg(pid_t) pid; 407 } */ *uap = v; 408 struct vnode *vp = NULL; 409 struct process *pr = NULL; 410 struct ucred *cred = NULL; 411 struct pgrp *pg; 412 int facs = SCARG(uap, facs) & ~((unsigned) KTRFAC_ROOT); 413 int ops = KTROP(SCARG(uap, ops)); 414 int descend = SCARG(uap, ops) & KTRFLAG_DESCEND; 415 int ret = 0; 416 int error = 0; 417 struct nameidata nd; 418 419 if (ops != KTROP_CLEAR) { 420 /* 421 * an operation which requires a file argument. 422 */ 423 cred = p->p_ucred; 424 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 425 p); 426 nd.ni_pledge = PLEDGE_CPATH | PLEDGE_WPATH; 427 if ((error = vn_open(&nd, FWRITE|O_NOFOLLOW, 0)) != 0) 428 goto done; 429 vp = nd.ni_vp; 430 431 VOP_UNLOCK(vp, p); 432 if (vp->v_type != VREG) { 433 error = EACCES; 434 goto done; 435 } 436 } 437 /* 438 * Clear all uses of the tracefile 439 */ 440 if (ops == KTROP_CLEARFILE) { 441 LIST_FOREACH(pr, &allprocess, ps_list) { 442 if (pr->ps_tracevp == vp) { 443 if (ktrcanset(p, pr)) 444 ktrcleartrace(pr); 445 else 446 error = EPERM; 447 } 448 } 449 goto done; 450 } 451 /* 452 * need something to (un)trace (XXX - why is this here?) 453 */ 454 if (!facs) { 455 error = EINVAL; 456 goto done; 457 } 458 if (ops == KTROP_SET) { 459 if (suser(p, 0) == 0) 460 facs |= KTRFAC_ROOT; 461 ktrstart(p, vp, cred); 462 } 463 /* 464 * do it 465 */ 466 if (SCARG(uap, pid) < 0) { 467 /* 468 * by process group 469 */ 470 pg = pgfind(-SCARG(uap, pid)); 471 if (pg == NULL) { 472 error = ESRCH; 473 goto done; 474 } 475 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 476 if (descend) 477 ret |= ktrsetchildren(p, pr, ops, facs, vp, 478 cred); 479 else 480 ret |= ktrops(p, pr, ops, facs, vp, cred); 481 } 482 } else { 483 /* 484 * by pid 485 */ 486 pr = prfind(SCARG(uap, pid)); 487 if (pr == NULL) { 488 error = ESRCH; 489 goto done; 490 } 491 if (descend) 492 ret |= ktrsetchildren(p, pr, ops, facs, vp, cred); 493 else 494 ret |= ktrops(p, pr, ops, facs, vp, cred); 495 } 496 if (!ret) 497 error = EPERM; 498 done: 499 if (vp != NULL) 500 (void) vn_close(vp, FWRITE, cred, p); 501 return (error); 502 } 503 504 int 505 ktrops(struct proc *curp, struct process *pr, int ops, int facs, 506 struct vnode *vp, struct ucred *cred) 507 { 508 if (!ktrcanset(curp, pr)) 509 return (0); 510 if (ops == KTROP_SET) 511 ktrsettrace(pr, facs, vp, cred); 512 else { 513 /* KTROP_CLEAR */ 514 pr->ps_traceflag &= ~facs; 515 if ((pr->ps_traceflag & KTRFAC_MASK) == 0) { 516 /* cleared all the facility bits, so stop completely */ 517 ktrcleartrace(pr); 518 } 519 } 520 521 return (1); 522 } 523 524 int 525 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs, 526 struct vnode *vp, struct ucred *cred) 527 { 528 struct process *pr; 529 int ret = 0; 530 531 pr = top; 532 for (;;) { 533 ret |= ktrops(curp, pr, ops, facs, vp, cred); 534 /* 535 * If this process has children, descend to them next, 536 * otherwise do any siblings, and if done with this level, 537 * follow back up the tree (but not past top). 538 */ 539 if (!LIST_EMPTY(&pr->ps_children)) 540 pr = LIST_FIRST(&pr->ps_children); 541 else for (;;) { 542 if (pr == top) 543 return (ret); 544 if (LIST_NEXT(pr, ps_sibling) != NULL) { 545 pr = LIST_NEXT(pr, ps_sibling); 546 break; 547 } 548 pr = pr->ps_pptr; 549 } 550 } 551 /*NOTREACHED*/ 552 } 553 554 int 555 ktrwrite(struct proc *p, struct ktr_header *kth, const void *aux, size_t len) 556 { 557 struct vnode *vp = p->p_p->ps_tracevp; 558 struct ucred *cred = p->p_p->ps_tracecred; 559 struct iovec data[2]; 560 int error; 561 562 if (vp == NULL) 563 return 0; 564 crhold(cred); 565 data[0].iov_base = (void *)aux; 566 data[0].iov_len = len; 567 data[1].iov_len = 0; 568 kth->ktr_len = len; 569 error = ktrwriteraw(p, vp, cred, kth, data); 570 crfree(cred); 571 return (error); 572 } 573 574 int 575 ktrwrite2(struct proc *p, struct ktr_header *kth, const void *aux1, 576 size_t len1, const void *aux2, size_t len2) 577 { 578 struct vnode *vp = p->p_p->ps_tracevp; 579 struct ucred *cred = p->p_p->ps_tracecred; 580 struct iovec data[2]; 581 int error; 582 583 if (vp == NULL) 584 return 0; 585 crhold(cred); 586 data[0].iov_base = (void *)aux1; 587 data[0].iov_len = len1; 588 data[1].iov_base = (void *)aux2; 589 data[1].iov_len = len2; 590 kth->ktr_len = len1 + len2; 591 error = ktrwriteraw(p, vp, cred, kth, data); 592 crfree(cred); 593 return (error); 594 } 595 596 int 597 ktrwriteraw(struct proc *curp, struct vnode *vp, struct ucred *cred, 598 struct ktr_header *kth, struct iovec *data) 599 { 600 struct uio auio; 601 struct iovec aiov[3]; 602 struct process *pr; 603 int error; 604 605 auio.uio_iov = &aiov[0]; 606 auio.uio_offset = 0; 607 auio.uio_segflg = UIO_SYSSPACE; 608 auio.uio_rw = UIO_WRITE; 609 aiov[0].iov_base = (caddr_t)kth; 610 aiov[0].iov_len = sizeof(struct ktr_header); 611 auio.uio_resid = sizeof(struct ktr_header); 612 auio.uio_iovcnt = 1; 613 auio.uio_procp = curp; 614 if (kth->ktr_len > 0) { 615 aiov[1] = data[0]; 616 aiov[2] = data[1]; 617 auio.uio_iovcnt++; 618 if (aiov[2].iov_len > 0) 619 auio.uio_iovcnt++; 620 auio.uio_resid += kth->ktr_len; 621 } 622 vget(vp, LK_EXCLUSIVE | LK_RETRY, curp); 623 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred); 624 if (!error) { 625 vput(vp); 626 return (0); 627 } 628 /* 629 * If error encountered, give up tracing on this vnode. 630 */ 631 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 632 error); 633 LIST_FOREACH(pr, &allprocess, ps_list) 634 if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) 635 ktrcleartrace(pr); 636 637 vput(vp); 638 return (error); 639 } 640 641 /* 642 * Return true if caller has permission to set the ktracing state 643 * of target. Essentially, the target can't possess any 644 * more permissions than the caller. KTRFAC_ROOT signifies that 645 * root previously set the tracing status on the target process, and 646 * so, only root may further change it. 647 * 648 * TODO: check groups. use caller effective gid. 649 */ 650 int 651 ktrcanset(struct proc *callp, struct process *targetpr) 652 { 653 struct ucred *caller = callp->p_ucred; 654 struct ucred *target = targetpr->ps_ucred; 655 656 if ((caller->cr_uid == target->cr_ruid && 657 target->cr_ruid == target->cr_svuid && 658 caller->cr_rgid == target->cr_rgid && /* XXX */ 659 target->cr_rgid == target->cr_svgid && 660 (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 && 661 !ISSET(targetpr->ps_flags, PS_SUGID)) || 662 caller->cr_uid == 0) 663 return (1); 664 665 return (0); 666 } 667