1 /* 2 * Copyright (c) 1993, 1995 Jan-Simon Pendry 3 * Copyright (c) 1993, 1995 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $ 36 */ 37 38 /* 39 * procfs vnode interface 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/time.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/fcntl.h> 48 #include <sys/proc.h> 49 #include <sys/caps.h> 50 #include <sys/signalvar.h> 51 #include <sys/vnode.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/namei.h> 55 #include <sys/dirent.h> 56 #include <sys/malloc.h> 57 #include <sys/reg.h> 58 #include <vm/vm_zone.h> 59 #include <vfs/procfs/procfs.h> 60 #include <sys/pioctl.h> 61 62 #include <sys/spinlock2.h> 63 64 #include <machine/limits.h> 65 66 static int procfs_access (struct vop_access_args *); 67 static int procfs_badop (struct vop_generic_args *); 68 static int procfs_bmap (struct vop_bmap_args *); 69 static int procfs_close (struct vop_close_args *); 70 static int procfs_getattr (struct vop_getattr_args *); 71 static int procfs_inactive (struct vop_inactive_args *); 72 static int procfs_ioctl (struct vop_ioctl_args *); 73 static int procfs_lookup (struct vop_old_lookup_args *); 74 static int procfs_open (struct vop_open_args *); 75 static int procfs_print (struct vop_print_args *); 76 static int procfs_readdir (struct vop_readdir_args *); 77 static int procfs_readlink (struct vop_readlink_args *); 78 static int procfs_reclaim (struct vop_reclaim_args *); 79 static int procfs_setattr (struct vop_setattr_args *); 80 81 static int procfs_readdir_proc(struct vop_readdir_args *); 82 static int procfs_readdir_root(struct vop_readdir_args *); 83 84 /* 85 * procfs vnode operations. 86 */ 87 struct vop_ops procfs_vnode_vops = { 88 .vop_default = vop_defaultop, 89 .vop_access = procfs_access, 90 .vop_advlock = (void *)procfs_badop, 91 .vop_bmap = procfs_bmap, 92 .vop_close = procfs_close, 93 .vop_old_create = (void *)procfs_badop, 94 .vop_getattr = procfs_getattr, 95 .vop_inactive = procfs_inactive, 96 .vop_old_link = (void *)procfs_badop, 97 .vop_old_lookup = procfs_lookup, 98 .vop_old_mkdir = (void *)procfs_badop, 99 .vop_old_mknod = (void *)procfs_badop, 100 .vop_open = procfs_open, 101 .vop_pathconf = vop_stdpathconf, 102 .vop_print = procfs_print, 103 .vop_read = procfs_rw, 104 .vop_readdir = procfs_readdir, 105 .vop_readlink = procfs_readlink, 106 .vop_reclaim = procfs_reclaim, 107 .vop_old_remove = (void *)procfs_badop, 108 .vop_old_rename = (void *)procfs_badop, 109 .vop_old_rmdir = (void *)procfs_badop, 110 .vop_setattr = procfs_setattr, 111 .vop_old_symlink = (void *)procfs_badop, 112 .vop_write = (void *)procfs_rw, 113 .vop_ioctl = procfs_ioctl 114 }; 115 116 117 /* 118 * This is a list of the valid names in the 119 * process-specific sub-directories. It is 120 * used in procfs_lookup and procfs_readdir 121 */ 122 static struct proc_target { 123 u_char pt_type; 124 u_char pt_namlen; 125 char *pt_name; 126 pfstype pt_pfstype; 127 int (*pt_valid) (struct lwp *p); 128 } proc_targets[] = { 129 #define N(s) sizeof(s)-1, s 130 /* name type validp */ 131 { DT_DIR, N("."), Pproc, NULL }, 132 { DT_DIR, N(".."), Proot, NULL }, 133 { DT_REG, N("mem"), Pmem, NULL }, 134 { DT_REG, N("regs"), Pregs, procfs_validregs }, 135 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 136 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs }, 137 { DT_REG, N("ctl"), Pctl, NULL }, 138 { DT_REG, N("status"), Pstatus, NULL }, 139 { DT_REG, N("note"), Pnote, NULL }, 140 { DT_REG, N("notepg"), Pnotepg, NULL }, 141 { DT_REG, N("map"), Pmap, procfs_validmap }, 142 { DT_REG, N("etype"), Ptype, procfs_validtype }, 143 { DT_REG, N("cmdline"), Pcmdline, NULL }, 144 { DT_REG, N("rlimit"), Prlimit, NULL }, 145 { DT_LNK, N("file"), Pfile, NULL }, 146 #undef N 147 }; 148 static const int nproc_targets = NELEM(proc_targets); 149 150 static pid_t atopid (const char *, u_int); 151 152 /* 153 * set things up for doing i/o on 154 * the pfsnode (vp). (vp) is locked 155 * on entry, and should be left locked 156 * on exit. 157 * 158 * for procfs we don't need to do anything 159 * in particular for i/o. all that is done 160 * is to support exclusive open on process 161 * memory images. 162 * 163 * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred, 164 * struct file *a_fp) 165 */ 166 static int 167 procfs_open(struct vop_open_args *ap) 168 { 169 struct pfsnode *pfs = VTOPFS(ap->a_vp); 170 struct proc *p1, *p2; 171 int error; 172 173 p2 = pfs_pfind(pfs->pfs_pid); 174 if (p2 == NULL) 175 return (ENOENT); 176 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) { 177 error = ENOENT; 178 goto done; 179 } 180 181 switch (pfs->pfs_type) { 182 case Pmem: 183 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 184 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { 185 error = EBUSY; 186 goto done; 187 } 188 189 p1 = curproc; 190 KKASSERT(p1); 191 /* Can't trace a process that's currently exec'ing. */ 192 if ((p2->p_flags & P_INEXEC) != 0) { 193 error = EAGAIN; 194 goto done; 195 } 196 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred)) { 197 error = EPERM; 198 goto done; 199 } 200 201 if (ap->a_mode & FWRITE) 202 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 203 204 break; 205 206 default: 207 break; 208 } 209 error = vop_stdopen(ap); 210 done: 211 pfs_pdone(p2); 212 return error; 213 } 214 215 /* 216 * close the pfsnode (vp) after doing i/o. 217 * (vp) is not locked on entry or exit. 218 * 219 * nothing to do for procfs other than undo 220 * any exclusive open flag (see _open above). 221 * 222 * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred) 223 */ 224 static int 225 procfs_close(struct vop_close_args *ap) 226 { 227 struct pfsnode *pfs = VTOPFS(ap->a_vp); 228 struct proc *p; 229 230 /* 231 * Make sure the lock is exclusive for opencount tests 232 */ 233 vn_lock(ap->a_vp, LK_UPGRADE | LK_RETRY); 234 235 switch (pfs->pfs_type) { 236 case Pmem: 237 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 238 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 239 /* 240 * v_opencount determines the last real close on the vnode. 241 * 242 * If this is the last close, then it checks to see if 243 * the target process has PF_LINGER set in p_pfsflags, 244 * if this is *not* the case, then the process' stop flags 245 * are cleared, and the process is woken up. This is 246 * to help prevent the case where a process has been 247 * told to stop on an event, but then the requesting process 248 * has gone away or forgotten about it. 249 */ 250 p = NULL; 251 if ((ap->a_vp->v_opencount < 2) 252 && ((p = pfs_pfind(pfs->pfs_pid)) != NULL || 253 (p = pfs_zpfind(pfs->pfs_pid)) != NULL) 254 && !(p->p_pfsflags & PF_LINGER)) { 255 spin_lock(&p->p_spin); 256 p->p_stops = 0; 257 p->p_step = 0; 258 spin_unlock(&p->p_spin); 259 wakeup(&p->p_stype); 260 wakeup(&p->p_step); 261 } 262 pfs_pdone(p); 263 break; 264 default: 265 break; 266 } 267 268 return (vop_stdclose(ap)); 269 } 270 271 /* 272 * do an ioctl operation on a pfsnode (vp). 273 * (vp) is not locked on entry or exit. 274 */ 275 static int 276 procfs_ioctl(struct vop_ioctl_args *ap) 277 { 278 struct pfsnode *pfs = VTOPFS(ap->a_vp); 279 struct proc *procp; 280 struct proc *p; 281 int error; 282 int signo; 283 struct procfs_status *psp; 284 unsigned char flags; 285 286 procp = pfs_pfind(pfs->pfs_pid); 287 if (procp == NULL) 288 return ENOTTY; 289 p = curproc; 290 if (p == NULL) { 291 error = EINVAL; 292 goto done; 293 } 294 295 /* Can't trace a process that's currently exec'ing. */ 296 if ((procp->p_flags & P_INEXEC) != 0) { 297 error = EAGAIN; 298 goto done; 299 } 300 if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) { 301 error = EPERM; 302 goto done; 303 } 304 305 switch (ap->a_command) { 306 case PIOCBIS: 307 spin_lock(&procp->p_spin); 308 procp->p_stops |= *(unsigned int*)ap->a_data; 309 spin_unlock(&procp->p_spin); 310 break; 311 case PIOCBIC: 312 spin_lock(&procp->p_spin); 313 procp->p_stops &= ~*(unsigned int*)ap->a_data; 314 spin_unlock(&procp->p_spin); 315 break; 316 case PIOCSFL: 317 /* 318 * NFLAGS is "non-suser_xxx flags" -- currently, only 319 * PFS_ISUGID ("ignore set u/g id"); 320 */ 321 #define NFLAGS (PF_ISUGID) 322 flags = (unsigned char)*(unsigned int*)ap->a_data; 323 if (flags & NFLAGS && 324 (error = caps_priv_check(ap->a_cred, SYSCAP_RESTRICTEDROOT))) 325 { 326 goto done; 327 } 328 procp->p_pfsflags = flags; 329 break; 330 case PIOCGFL: 331 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; 332 break; 333 case PIOCSTATUS: 334 /* 335 * NOTE: syscall entry deals with stopevents and may run without 336 * the MP lock. 337 */ 338 psp = (struct procfs_status *)ap->a_data; 339 psp->flags = procp->p_pfsflags; 340 psp->events = procp->p_stops; 341 spin_lock(&procp->p_spin); 342 if (procp->p_step) { 343 psp->state = 0; 344 psp->why = procp->p_stype; 345 psp->val = procp->p_xstat; 346 spin_unlock(&procp->p_spin); 347 } else { 348 psp->state = 1; 349 spin_unlock(&procp->p_spin); 350 psp->why = 0; /* Not defined values */ 351 psp->val = 0; /* Not defined values */ 352 } 353 break; 354 case PIOCWAIT: 355 /* 356 * NOTE: syscall entry deals with stopevents and may run without 357 * the MP lock. 358 */ 359 psp = (struct procfs_status *)ap->a_data; 360 spin_lock(&procp->p_spin); 361 while (procp->p_step == 0) { 362 tsleep_interlock(&procp->p_stype, PCATCH); 363 spin_unlock(&procp->p_spin); 364 if (procp->p_stops == 0) { 365 error = 0; 366 goto done; 367 } 368 if (procp->p_flags & P_POSTEXIT) { 369 error = EINVAL; 370 goto done; 371 } 372 if (procp->p_flags & P_INEXEC) { 373 error = EAGAIN; 374 goto done; 375 } 376 error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED, 377 "piocwait", 0); 378 if (error) 379 goto done; 380 spin_lock(&procp->p_spin); 381 } 382 spin_unlock(&procp->p_spin); 383 psp->state = 1; /* It stopped */ 384 psp->flags = procp->p_pfsflags; 385 psp->events = procp->p_stops; 386 psp->why = procp->p_stype; /* why it stopped */ 387 psp->val = procp->p_xstat; /* any extra info */ 388 break; 389 case PIOCCONT: /* Restart a proc */ 390 /* 391 * NOTE: syscall entry deals with stopevents and may run without 392 * the MP lock. However, the caller is presumably interlocked 393 * by having waited. 394 */ 395 if (procp->p_step == 0) { 396 error = EINVAL; /* Can only start a stopped process */ 397 goto done; 398 } 399 if ((signo = *(int*)ap->a_data) != 0) { 400 if (signo >= NSIG || signo <= 0) { 401 error = EINVAL; 402 goto done; 403 } 404 ksignal(procp, signo); 405 } 406 procp->p_step = 0; 407 wakeup(&procp->p_step); 408 break; 409 default: 410 error = ENOTTY; 411 goto done; 412 } 413 error = 0; 414 done: 415 pfs_pdone(procp); 416 return error; 417 } 418 419 /* 420 * do block mapping for pfsnode (vp). 421 * since we don't use the buffer cache 422 * for procfs this function should never 423 * be called. in any case, it's not clear 424 * what part of the kernel ever makes use 425 * of this function. for sanity, this is the 426 * usual no-op bmap, although returning 427 * (EIO) would be a reasonable alternative. 428 * 429 * XXX mmap assumes buffer cache operation 430 * 431 * procfs_bmap(struct vnode *a_vp, off_t a_loffset, 432 * off_t *a_doffsetp, int *a_runp, int *a_runb) 433 */ 434 static int 435 procfs_bmap(struct vop_bmap_args *ap) 436 { 437 if (ap->a_doffsetp != NULL) 438 *ap->a_doffsetp = ap->a_loffset; 439 if (ap->a_runp != NULL) 440 *ap->a_runp = 0; 441 if (ap->a_runb != NULL) 442 *ap->a_runb = 0; 443 return (0); 444 } 445 446 /* 447 * procfs_inactive is called when the pfsnode 448 * is vrele'd and the reference count goes 449 * to zero. (vp) will be on the vnode free 450 * list, so to get it back vget() must be 451 * used. 452 * 453 * (vp) is locked on entry, but must be unlocked on exit. 454 * 455 * procfs_inactive(struct vnode *a_vp) 456 */ 457 static int 458 procfs_inactive(struct vop_inactive_args *ap) 459 { 460 struct pfsnode *pfs = VTOPFS(ap->a_vp); 461 462 if (pfs->pfs_pid & PFS_DEAD) 463 vrecycle(ap->a_vp); 464 return (0); 465 } 466 467 /* 468 * _reclaim is called when getnewvnode() 469 * wants to make use of an entry on the vnode 470 * free list. at this time the filesystem needs 471 * to free any private data and remove the node 472 * from any private lists. 473 * 474 * procfs_reclaim(struct vnode *a_vp) 475 */ 476 static int 477 procfs_reclaim(struct vop_reclaim_args *ap) 478 { 479 return (procfs_freevp(ap->a_vp)); 480 } 481 482 /* 483 * _print is used for debugging. 484 * just print a readable description 485 * of (vp). 486 * 487 * procfs_print(struct vnode *a_vp) 488 */ 489 static int 490 procfs_print(struct vop_print_args *ap) 491 { 492 struct pfsnode *pfs = VTOPFS(ap->a_vp); 493 494 kprintf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n", 495 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 496 return (0); 497 } 498 499 /* 500 * generic entry point for unsupported operations 501 */ 502 static int 503 procfs_badop(struct vop_generic_args *ap) 504 { 505 return (EIO); 506 } 507 508 /* 509 * Invent attributes for pfsnode (vp) and store 510 * them in (vap). 511 * Directories lengths are returned as zero since 512 * any real length would require the genuine size 513 * to be computed, and nothing cares anyway. 514 * 515 * this is relatively minimal for procfs. 516 * 517 * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap) 518 */ 519 static int 520 procfs_getattr(struct vop_getattr_args *ap) 521 { 522 struct pfsnode *pfs = VTOPFS(ap->a_vp); 523 struct vattr *vap = ap->a_vap; 524 struct proc *procp; 525 int error; 526 527 /* 528 * First make sure that the process and its credentials 529 * still exist. 530 */ 531 switch (pfs->pfs_type) { 532 case Proot: 533 case Pcurproc: 534 procp = NULL; 535 break; 536 default: 537 procp = pfs_pfind(pfs->pfs_pid); 538 if (procp == NULL || procp->p_ucred == NULL) { 539 error = ENOENT; 540 goto done; 541 } 542 break; 543 } 544 545 error = 0; 546 547 /* start by zeroing out the attributes */ 548 VATTR_NULL(vap); 549 550 /* next do all the common fields */ 551 vap->va_type = ap->a_vp->v_type; 552 vap->va_mode = pfs->pfs_mode; 553 vap->va_fileid = pfs->pfs_fileno; 554 vap->va_flags = 0; 555 vap->va_blocksize = PAGE_SIZE; 556 vap->va_bytes = vap->va_size = 0; 557 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 558 559 /* 560 * Make all times be current TOD. 561 * It would be possible to get the process start 562 * time from the p_stat structure, but there's 563 * no "file creation" time stamp anyway, and the 564 * p_stat structure is not addressible if u. gets 565 * swapped out for that process. 566 */ 567 vfs_timestamp(&vap->va_ctime); 568 vap->va_atime = vap->va_mtime = vap->va_ctime; 569 570 /* 571 * If the process has exercised some setuid or setgid 572 * privilege, then rip away read/write permission so 573 * that only root can gain access. 574 */ 575 switch (pfs->pfs_type) { 576 case Pctl: 577 case Pregs: 578 case Pfpregs: 579 case Pdbregs: 580 case Pmem: 581 if (procp->p_flags & P_SUGID) { 582 vap->va_mode &= ~((VREAD|VWRITE)| 583 ((VREAD|VWRITE)>>3)| 584 ((VREAD|VWRITE)>>6)); 585 } 586 break; 587 default: 588 break; 589 } 590 591 /* 592 * now do the object specific fields 593 * 594 * The size could be set from struct reg, but it's hardly 595 * worth the trouble, and it puts some (potentially) machine 596 * dependent data into this machine-independent code. If it 597 * becomes important then this function should break out into 598 * a per-file stat function in the corresponding .c file. 599 */ 600 601 vap->va_nlink = 1; 602 if (procp) { 603 if (procp->p_ucred) { 604 vap->va_uid = procp->p_ucred->cr_uid; 605 vap->va_gid = procp->p_ucred->cr_gid; 606 } else { 607 vap->va_uid = -1; 608 vap->va_gid = -1; 609 } 610 } 611 612 switch (pfs->pfs_type) { 613 case Proot: 614 /* 615 * Set nlink to 1 to tell fts(3) we don't actually know. 616 */ 617 vap->va_nlink = 1; 618 vap->va_uid = 0; 619 vap->va_gid = 0; 620 vap->va_size = vap->va_bytes = DEV_BSIZE; 621 break; 622 623 case Pcurproc: { 624 char buf[16]; /* should be enough */ 625 626 vap->va_uid = 0; 627 vap->va_gid = 0; 628 vap->va_size = ksnprintf(buf, sizeof(buf), 629 "%ld", (long)curproc->p_pid); 630 vap->va_bytes = vap->va_size; 631 break; 632 } 633 634 case Pproc: 635 vap->va_nlink = nproc_targets; 636 vap->va_size = vap->va_bytes = DEV_BSIZE; 637 break; 638 639 case Pfile: { 640 char *fullpath, *freepath; 641 642 if (procp->p_textnch.ncp) { 643 struct nchandle nch; 644 645 cache_copy(&procp->p_textnch, &nch); 646 error = cache_fullpath(procp, &nch, NULL, 647 &fullpath, &freepath, 0); 648 cache_drop(&nch); 649 } else { 650 error = EINVAL; 651 } 652 653 if (error == 0) { 654 vap->va_size = strlen(fullpath); 655 kfree(freepath, M_TEMP); 656 } else { 657 vap->va_size = sizeof("unknown") - 1; 658 error = 0; 659 } 660 vap->va_bytes = vap->va_size; 661 break; 662 } 663 664 case Pmem: 665 /* 666 * If we denied owner access earlier, then we have to 667 * change the owner to root - otherwise 'ps' and friends 668 * will break even though they are setgid kmem. *SIGH* 669 */ 670 if (procp->p_flags & P_SUGID) 671 vap->va_uid = 0; 672 else if (procp->p_ucred) 673 vap->va_uid = procp->p_ucred->cr_uid; 674 else 675 vap->va_uid = -1; 676 break; 677 678 case Pregs: 679 vap->va_bytes = vap->va_size = sizeof(struct reg); 680 break; 681 682 case Pfpregs: 683 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 684 break; 685 686 case Pdbregs: 687 vap->va_bytes = vap->va_size = sizeof(struct dbreg); 688 break; 689 690 case Ptype: 691 case Pmap: 692 case Pctl: 693 case Pstatus: 694 case Pnote: 695 case Pnotepg: 696 case Pcmdline: 697 case Prlimit: 698 break; 699 700 default: 701 panic("procfs_getattr"); 702 } 703 done: 704 pfs_pdone(procp); 705 return (error); 706 } 707 708 /* 709 * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap, 710 * struct ucred *a_cred) 711 */ 712 static int 713 procfs_setattr(struct vop_setattr_args *ap) 714 { 715 if (ap->a_vap->va_flags != VNOVAL) 716 return (EOPNOTSUPP); 717 718 /* 719 * just fake out attribute setting 720 * it's not good to generate an error 721 * return, otherwise things like creat() 722 * will fail when they try to set the 723 * file length to 0. worse, this means 724 * that echo $note > /proc/$pid/note will fail. 725 */ 726 727 return (0); 728 } 729 730 /* 731 * implement access checking. 732 * 733 * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred) 734 */ 735 static int 736 procfs_access(struct vop_access_args *ap) 737 { 738 struct vattr vattr; 739 int error; 740 741 error = VOP_GETATTR(ap->a_vp, &vattr); 742 if (!error) 743 error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid, 744 vattr.va_mode, 0); 745 return (error); 746 } 747 748 /* 749 * lookup. this is incredibly complicated in the general case, however 750 * for most pseudo-filesystems very little needs to be done. 751 * 752 * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 753 * struct componentname *a_cnp) 754 */ 755 static int 756 procfs_lookup(struct vop_old_lookup_args *ap) 757 { 758 struct componentname *cnp = ap->a_cnp; 759 struct vnode **vpp = ap->a_vpp; 760 struct vnode *dvp = ap->a_dvp; 761 char *pname = cnp->cn_nameptr; 762 /* struct proc *curp = cnp->cn_proc; */ 763 struct proc_target *pt; 764 pid_t pid; 765 struct pfsnode *pfs; 766 struct proc *p; 767 struct lwp *lp; 768 int i; 769 int error; 770 771 *vpp = NULL; 772 773 if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME) 774 return (EROFS); 775 776 p = NULL; 777 error = 0; 778 if (cnp->cn_namelen == 1 && *pname == '.') { 779 *vpp = dvp; 780 vref(*vpp); 781 goto out; 782 } 783 784 pfs = VTOPFS(dvp); 785 switch (pfs->pfs_type) { 786 case Proot: 787 if (cnp->cn_flags & CNP_ISDOTDOT) 788 return (EIO); 789 790 if (CNEQ(cnp, "curproc", 7)) { 791 error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc); 792 goto out; 793 } 794 795 pid = atopid(pname, cnp->cn_namelen); 796 if (pid == NO_PID) 797 break; 798 799 p = pfs_pfind(pid); 800 if (p == NULL) 801 break; 802 803 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 804 break; 805 806 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 807 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 808 break; 809 810 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 811 goto out; 812 813 case Pproc: 814 if (cnp->cn_flags & CNP_ISDOTDOT) { 815 error = procfs_root(dvp->v_mount, vpp); 816 goto out; 817 } 818 819 p = pfs_pfind(pfs->pfs_pid); 820 if (p == NULL) 821 break; 822 /* XXX lwp */ 823 lp = FIRST_LWP_IN_PROC(p); 824 if (lp == NULL) 825 break; 826 827 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 828 break; 829 830 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 831 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 832 break; 833 834 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 835 if (cnp->cn_namelen == pt->pt_namlen && 836 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 837 (pt->pt_valid == NULL || (*pt->pt_valid)(lp))) 838 goto found; 839 } 840 break; 841 found: 842 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 843 pt->pt_pfstype); 844 goto out; 845 846 default: 847 error = ENOTDIR; 848 goto out; 849 } 850 if (cnp->cn_nameiop == NAMEI_LOOKUP) 851 error = ENOENT; 852 else 853 error = EROFS; 854 /* 855 * If no error occured *vpp will hold a referenced locked vnode. 856 * dvp was passed to us locked and *vpp must be returned locked. 857 * If *vpp != dvp then we should unlock dvp if (1) this is not the 858 * last component or (2) CNP_LOCKPARENT is not set. 859 */ 860 out: 861 if (error == 0 && *vpp != dvp) { 862 if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) { 863 cnp->cn_flags |= CNP_PDIRUNLOCK; 864 vn_unlock(dvp); 865 } 866 } 867 pfs_pdone(p); 868 return (error); 869 } 870 871 /* 872 * Does this process have a text file? 873 */ 874 int 875 procfs_validfile(struct lwp *lp) 876 { 877 return (procfs_findtextvp(lp->lwp_proc) != NULLVP); 878 } 879 880 /* 881 * readdir() returns directory entries from pfsnode (vp). 882 * 883 * We generate just one directory entry at a time, as it would probably 884 * not pay off to buffer several entries locally to save uiomove calls. 885 * 886 * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred, 887 * int *a_eofflag, int *a_ncookies, off_t **a_cookies) 888 */ 889 static int 890 procfs_readdir(struct vop_readdir_args *ap) 891 { 892 struct pfsnode *pfs; 893 int error; 894 895 if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX) 896 return (EINVAL); 897 error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 898 if (error) 899 return (error); 900 pfs = VTOPFS(ap->a_vp); 901 902 switch (pfs->pfs_type) { 903 case Pproc: 904 /* 905 * this is for the process-specific sub-directories. 906 * all that is needed to is copy out all the entries 907 * from the procent[] table (top of this file). 908 */ 909 error = procfs_readdir_proc(ap); 910 break; 911 case Proot: 912 /* 913 * this is for the root of the procfs filesystem 914 * what is needed is a special entry for "curproc" 915 * followed by an entry for each process on allproc 916 */ 917 error = procfs_readdir_root(ap); 918 break; 919 default: 920 error = ENOTDIR; 921 break; 922 } 923 924 vn_unlock(ap->a_vp); 925 return (error); 926 } 927 928 static int 929 procfs_readdir_proc(struct vop_readdir_args *ap) 930 { 931 struct pfsnode *pfs; 932 int error, i, retval; 933 struct proc *p; 934 struct lwp *lp; 935 struct proc_target *pt; 936 struct uio *uio = ap->a_uio; 937 938 pfs = VTOPFS(ap->a_vp); 939 p = pfs_pfind(pfs->pfs_pid); 940 if (p == NULL) 941 return(0); 942 if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) { 943 error = 0; 944 goto done; 945 } 946 /* XXX lwp, not MPSAFE */ 947 lp = FIRST_LWP_IN_PROC(p); 948 if (lp == NULL) { 949 error = EINVAL; 950 goto done; 951 } 952 953 error = 0; 954 i = (int)uio->uio_offset; 955 if (i < 0) { 956 error = EINVAL; 957 goto done; 958 } 959 960 for (pt = &proc_targets[i]; 961 !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) { 962 if (pt->pt_valid && (*pt->pt_valid)(lp) == 0) 963 continue; 964 965 retval = vop_write_dirent(&error, uio, 966 PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type, 967 pt->pt_namlen, pt->pt_name); 968 if (retval) 969 break; 970 } 971 972 uio->uio_offset = (off_t)i; 973 error = 0; 974 done: 975 pfs_pdone(p); 976 return error; 977 } 978 979 struct procfs_readdir_root_info { 980 int error; 981 int i; 982 int pcnt; 983 struct uio *uio; 984 struct ucred *cred; 985 }; 986 987 static int procfs_readdir_root_callback(struct proc *p, void *data); 988 989 static int 990 procfs_readdir_root(struct vop_readdir_args *ap) 991 { 992 struct procfs_readdir_root_info info; 993 struct uio *uio = ap->a_uio; 994 int res; 995 996 res = 0; 997 info.error = 0; 998 info.i = (int)uio->uio_offset; 999 1000 if (info.i < 0) 1001 return (EINVAL); 1002 1003 info.pcnt = 0; 1004 info.uio = uio; 1005 info.cred = ap->a_cred; 1006 while (info.pcnt < 3) { 1007 res = procfs_readdir_root_callback(NULL, &info); 1008 if (res < 0) 1009 break; 1010 } 1011 if (res >= 0) 1012 allproc_scan(procfs_readdir_root_callback, &info, 0); 1013 uio->uio_offset = (off_t)info.i; 1014 1015 return (info.error); 1016 } 1017 1018 static int 1019 procfs_readdir_root_callback(struct proc *p, void *data) 1020 { 1021 struct procfs_readdir_root_info *info = data; 1022 struct uio *uio; 1023 int retval; 1024 ino_t d_ino; 1025 const char *d_name; 1026 char d_name_pid[20]; 1027 size_t d_namlen; 1028 uint8_t d_type; 1029 1030 uio = info->uio; 1031 1032 if (uio->uio_resid <= 0 || info->error) 1033 return(-1); 1034 1035 switch (info->pcnt) { 1036 case 0: /* `.' */ 1037 d_ino = PROCFS_FILENO(0, Proot); 1038 d_name = "."; 1039 d_namlen = 1; 1040 d_type = DT_DIR; 1041 break; 1042 case 1: /* `..' */ 1043 d_ino = PROCFS_FILENO(0, Proot); 1044 d_name = ".."; 1045 d_namlen = 2; 1046 d_type = DT_DIR; 1047 break; 1048 1049 case 2: 1050 d_ino = PROCFS_FILENO(0, Pcurproc); 1051 d_namlen = 7; 1052 d_name = "curproc"; 1053 d_type = DT_LNK; 1054 break; 1055 1056 1057 default: 1058 if (!PRISON_CHECK(info->cred, p->p_ucred)) 1059 return(0); 1060 if (ps_showallprocs == 0 && 1061 info->cred->cr_uid != 0 && 1062 info->cred->cr_uid != p->p_ucred->cr_uid) { 1063 return(0); 1064 } 1065 1066 /* 1067 * Skip entries we have already returned (optimization) 1068 */ 1069 if (info->pcnt < info->i) { 1070 ++info->pcnt; 1071 return(0); 1072 } 1073 1074 d_ino = PROCFS_FILENO(p->p_pid, Pproc); 1075 d_namlen = ksnprintf(d_name_pid, sizeof(d_name_pid), 1076 "%ld", (long)p->p_pid); 1077 d_name = d_name_pid; 1078 d_type = DT_DIR; 1079 break; 1080 } 1081 1082 /* 1083 * Skip entries we have already returned (optimization) 1084 */ 1085 if (info->pcnt < info->i) { 1086 ++info->pcnt; 1087 return(0); 1088 } 1089 1090 retval = vop_write_dirent(&info->error, uio, 1091 d_ino, d_type, d_namlen, d_name); 1092 if (retval) 1093 return(-1); 1094 ++info->pcnt; 1095 ++info->i; 1096 return(0); 1097 } 1098 1099 /* 1100 * readlink reads the link of `curproc' or `file' 1101 */ 1102 static int 1103 procfs_readlink(struct vop_readlink_args *ap) 1104 { 1105 char buf[16]; /* should be enough */ 1106 struct proc *procp; 1107 struct vnode *vp = ap->a_vp; 1108 struct pfsnode *pfs = VTOPFS(vp); 1109 char *fullpath, *freepath; 1110 int error, len; 1111 1112 switch (pfs->pfs_type) { 1113 case Pcurproc: 1114 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc)) 1115 return (EINVAL); 1116 1117 len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); 1118 1119 return (uiomove(buf, len, ap->a_uio)); 1120 case Pfile: 1121 /* 1122 * procfs's directory topology is somewhat asynchronous from 1123 * reality so it is possible for pid requests to race exiting 1124 * processes. In this situation, bit 31 is set in 1125 * pfs->pfs_pid which guarantees that pfs_pfind() will return 1126 * NULL. 1127 * 1128 * It is also possible to catch a process in the middle of 1129 * an exit sequence so various fields might wind up being 1130 * NULL that are not normally NULL. 1131 */ 1132 procp = pfs_pfind(pfs->pfs_pid); 1133 if (procp == NULL || procp->p_ucred == NULL) { 1134 pfs_pdone(procp); 1135 return (uiomove("unknown", sizeof("unknown") - 1, 1136 ap->a_uio)); 1137 } 1138 if (procp->p_textnch.ncp) { 1139 struct nchandle nch; 1140 1141 cache_copy(&procp->p_textnch, &nch); 1142 error = cache_fullpath(procp, &nch, NULL, 1143 &fullpath, &freepath, 0); 1144 cache_drop(&nch); 1145 } else { 1146 error = EINVAL; 1147 } 1148 1149 if (error != 0) { 1150 pfs_pdone(procp); 1151 return (uiomove("unknown", sizeof("unknown") - 1, 1152 ap->a_uio)); 1153 } 1154 error = uiomove(fullpath, strlen(fullpath), ap->a_uio); 1155 kfree(freepath, M_TEMP); 1156 pfs_pdone(procp); 1157 return (error); 1158 default: 1159 return (EINVAL); 1160 } 1161 } 1162 1163 /* 1164 * convert decimal ascii to pid_t 1165 */ 1166 static pid_t 1167 atopid(const char *b, u_int len) 1168 { 1169 pid_t p = 0; 1170 1171 while (len--) { 1172 char c = *b++; 1173 if (c < '0' || c > '9') 1174 return (NO_PID); 1175 p = 10 * p + (c - '0'); 1176 if (p > PID_MAX) 1177 return (NO_PID); 1178 } 1179 1180 return (p); 1181 } 1182 1183