1 /* 2 * Copyright (c) 1993, 1995 Jan-Simon Pendry 3 * Copyright (c) 1993, 1995 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $ 36 */ 37 38 /* 39 * procfs vnode interface 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/time.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/fcntl.h> 48 #include <sys/proc.h> 49 #include <sys/priv.h> 50 #include <sys/signalvar.h> 51 #include <sys/vnode.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/namei.h> 55 #include <sys/dirent.h> 56 #include <sys/malloc.h> 57 #include <sys/reg.h> 58 #include <vm/vm_zone.h> 59 #include <vfs/procfs/procfs.h> 60 #include <sys/pioctl.h> 61 62 #include <sys/spinlock2.h> 63 64 #include <machine/limits.h> 65 66 static int procfs_access (struct vop_access_args *); 67 static int procfs_badop (struct vop_generic_args *); 68 static int procfs_bmap (struct vop_bmap_args *); 69 static int procfs_close (struct vop_close_args *); 70 static int procfs_getattr (struct vop_getattr_args *); 71 static int procfs_inactive (struct vop_inactive_args *); 72 static int procfs_ioctl (struct vop_ioctl_args *); 73 static int procfs_lookup (struct vop_old_lookup_args *); 74 static int procfs_open (struct vop_open_args *); 75 static int procfs_print (struct vop_print_args *); 76 static int procfs_readdir (struct vop_readdir_args *); 77 static int procfs_readlink (struct vop_readlink_args *); 78 static int procfs_reclaim (struct vop_reclaim_args *); 79 static int procfs_setattr (struct vop_setattr_args *); 80 81 static int procfs_readdir_proc(struct vop_readdir_args *); 82 static int procfs_readdir_root(struct vop_readdir_args *); 83 84 /* 85 * procfs vnode operations. 86 */ 87 struct vop_ops procfs_vnode_vops = { 88 .vop_default = vop_defaultop, 89 .vop_access = procfs_access, 90 .vop_advlock = (void *)procfs_badop, 91 .vop_bmap = procfs_bmap, 92 .vop_close = procfs_close, 93 .vop_old_create = (void *)procfs_badop, 94 .vop_getattr = procfs_getattr, 95 .vop_inactive = procfs_inactive, 96 .vop_old_link = (void *)procfs_badop, 97 .vop_old_lookup = procfs_lookup, 98 .vop_old_mkdir = (void *)procfs_badop, 99 .vop_old_mknod = (void *)procfs_badop, 100 .vop_open = procfs_open, 101 .vop_pathconf = vop_stdpathconf, 102 .vop_print = procfs_print, 103 .vop_read = procfs_rw, 104 .vop_readdir = procfs_readdir, 105 .vop_readlink = procfs_readlink, 106 .vop_reclaim = procfs_reclaim, 107 .vop_old_remove = (void *)procfs_badop, 108 .vop_old_rename = (void *)procfs_badop, 109 .vop_old_rmdir = (void *)procfs_badop, 110 .vop_setattr = procfs_setattr, 111 .vop_old_symlink = (void *)procfs_badop, 112 .vop_write = (void *)procfs_rw, 113 .vop_ioctl = procfs_ioctl 114 }; 115 116 117 /* 118 * This is a list of the valid names in the 119 * process-specific sub-directories. It is 120 * used in procfs_lookup and procfs_readdir 121 */ 122 static struct proc_target { 123 u_char pt_type; 124 u_char pt_namlen; 125 char *pt_name; 126 pfstype pt_pfstype; 127 int (*pt_valid) (struct lwp *p); 128 } proc_targets[] = { 129 #define N(s) sizeof(s)-1, s 130 /* name type validp */ 131 { DT_DIR, N("."), Pproc, NULL }, 132 { DT_DIR, N(".."), Proot, NULL }, 133 { DT_REG, N("mem"), Pmem, NULL }, 134 { DT_REG, N("regs"), Pregs, procfs_validregs }, 135 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 136 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs }, 137 { DT_REG, N("ctl"), Pctl, NULL }, 138 { DT_REG, N("status"), Pstatus, NULL }, 139 { DT_REG, N("note"), Pnote, NULL }, 140 { DT_REG, N("notepg"), Pnotepg, NULL }, 141 { DT_REG, N("map"), Pmap, procfs_validmap }, 142 { DT_REG, N("etype"), Ptype, procfs_validtype }, 143 { DT_REG, N("cmdline"), Pcmdline, NULL }, 144 { DT_REG, N("rlimit"), Prlimit, NULL }, 145 { DT_LNK, N("file"), Pfile, NULL }, 146 #undef N 147 }; 148 static const int nproc_targets = NELEM(proc_targets); 149 150 static pid_t atopid (const char *, u_int); 151 152 /* 153 * set things up for doing i/o on 154 * the pfsnode (vp). (vp) is locked 155 * on entry, and should be left locked 156 * on exit. 157 * 158 * for procfs we don't need to do anything 159 * in particular for i/o. all that is done 160 * is to support exclusive open on process 161 * memory images. 162 * 163 * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred, 164 * struct file *a_fp) 165 */ 166 static int 167 procfs_open(struct vop_open_args *ap) 168 { 169 struct pfsnode *pfs = VTOPFS(ap->a_vp); 170 struct proc *p1, *p2; 171 int error; 172 173 p2 = pfs_pfind(pfs->pfs_pid); 174 if (p2 == NULL) 175 return (ENOENT); 176 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) { 177 error = ENOENT; 178 goto done; 179 } 180 181 switch (pfs->pfs_type) { 182 case Pmem: 183 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 184 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { 185 error = EBUSY; 186 goto done; 187 } 188 189 p1 = curproc; 190 KKASSERT(p1); 191 /* Can't trace a process that's currently exec'ing. */ 192 if ((p2->p_flags & P_INEXEC) != 0) { 193 error = EAGAIN; 194 goto done; 195 } 196 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred)) { 197 error = EPERM; 198 goto done; 199 } 200 201 if (ap->a_mode & FWRITE) 202 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 203 204 break; 205 206 default: 207 break; 208 } 209 error = vop_stdopen(ap); 210 done: 211 pfs_pdone(p2); 212 return error; 213 } 214 215 /* 216 * close the pfsnode (vp) after doing i/o. 217 * (vp) is not locked on entry or exit. 218 * 219 * nothing to do for procfs other than undo 220 * any exclusive open flag (see _open above). 221 * 222 * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred) 223 */ 224 static int 225 procfs_close(struct vop_close_args *ap) 226 { 227 struct pfsnode *pfs = VTOPFS(ap->a_vp); 228 struct proc *p; 229 230 /* 231 * Make sure the lock is exclusive for opencount tests 232 */ 233 vn_lock(ap->a_vp, LK_UPGRADE | LK_RETRY); 234 235 switch (pfs->pfs_type) { 236 case Pmem: 237 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 238 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 239 /* 240 * v_opencount determines the last real close on the vnode. 241 * 242 * If this is the last close, then it checks to see if 243 * the target process has PF_LINGER set in p_pfsflags, 244 * if this is *not* the case, then the process' stop flags 245 * are cleared, and the process is woken up. This is 246 * to help prevent the case where a process has been 247 * told to stop on an event, but then the requesting process 248 * has gone away or forgotten about it. 249 */ 250 p = NULL; 251 if ((ap->a_vp->v_opencount < 2) 252 && ((p = pfs_pfind(pfs->pfs_pid)) != NULL || 253 (p = pfs_zpfind(pfs->pfs_pid)) != NULL) 254 && !(p->p_pfsflags & PF_LINGER)) { 255 spin_lock(&p->p_spin); 256 p->p_stops = 0; 257 p->p_step = 0; 258 spin_unlock(&p->p_spin); 259 wakeup(&p->p_stype); 260 wakeup(&p->p_step); 261 } 262 pfs_pdone(p); 263 break; 264 default: 265 break; 266 } 267 268 return (vop_stdclose(ap)); 269 } 270 271 /* 272 * do an ioctl operation on a pfsnode (vp). 273 * (vp) is not locked on entry or exit. 274 */ 275 static int 276 procfs_ioctl(struct vop_ioctl_args *ap) 277 { 278 struct pfsnode *pfs = VTOPFS(ap->a_vp); 279 struct proc *procp; 280 struct proc *p; 281 int error; 282 int signo; 283 struct procfs_status *psp; 284 unsigned char flags; 285 286 procp = pfs_pfind(pfs->pfs_pid); 287 if (procp == NULL) 288 return ENOTTY; 289 p = curproc; 290 if (p == NULL) { 291 error = EINVAL; 292 goto done; 293 } 294 295 /* Can't trace a process that's currently exec'ing. */ 296 if ((procp->p_flags & P_INEXEC) != 0) { 297 error = EAGAIN; 298 goto done; 299 } 300 if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) { 301 error = EPERM; 302 goto done; 303 } 304 305 switch (ap->a_command) { 306 case PIOCBIS: 307 spin_lock(&procp->p_spin); 308 procp->p_stops |= *(unsigned int*)ap->a_data; 309 spin_unlock(&procp->p_spin); 310 break; 311 case PIOCBIC: 312 spin_lock(&procp->p_spin); 313 procp->p_stops &= ~*(unsigned int*)ap->a_data; 314 spin_unlock(&procp->p_spin); 315 break; 316 case PIOCSFL: 317 /* 318 * NFLAGS is "non-suser_xxx flags" -- currently, only 319 * PFS_ISUGID ("ignore set u/g id"); 320 */ 321 #define NFLAGS (PF_ISUGID) 322 flags = (unsigned char)*(unsigned int*)ap->a_data; 323 if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0))) 324 goto done; 325 procp->p_pfsflags = flags; 326 break; 327 case PIOCGFL: 328 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; 329 break; 330 case PIOCSTATUS: 331 /* 332 * NOTE: syscall entry deals with stopevents and may run without 333 * the MP lock. 334 */ 335 psp = (struct procfs_status *)ap->a_data; 336 psp->flags = procp->p_pfsflags; 337 psp->events = procp->p_stops; 338 spin_lock(&procp->p_spin); 339 if (procp->p_step) { 340 psp->state = 0; 341 psp->why = procp->p_stype; 342 psp->val = procp->p_xstat; 343 spin_unlock(&procp->p_spin); 344 } else { 345 psp->state = 1; 346 spin_unlock(&procp->p_spin); 347 psp->why = 0; /* Not defined values */ 348 psp->val = 0; /* Not defined values */ 349 } 350 break; 351 case PIOCWAIT: 352 /* 353 * NOTE: syscall entry deals with stopevents and may run without 354 * the MP lock. 355 */ 356 psp = (struct procfs_status *)ap->a_data; 357 spin_lock(&procp->p_spin); 358 while (procp->p_step == 0) { 359 tsleep_interlock(&procp->p_stype, PCATCH); 360 spin_unlock(&procp->p_spin); 361 if (procp->p_stops == 0) { 362 error = EINVAL; 363 goto done; 364 } 365 if (procp->p_flags & P_POSTEXIT) { 366 error = EINVAL; 367 goto done; 368 } 369 if (procp->p_flags & P_INEXEC) { 370 error = EAGAIN; 371 goto done; 372 } 373 error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED, 374 "piocwait", 0); 375 if (error) 376 goto done; 377 spin_lock(&procp->p_spin); 378 } 379 spin_unlock(&procp->p_spin); 380 psp->state = 1; /* It stopped */ 381 psp->flags = procp->p_pfsflags; 382 psp->events = procp->p_stops; 383 psp->why = procp->p_stype; /* why it stopped */ 384 psp->val = procp->p_xstat; /* any extra info */ 385 break; 386 case PIOCCONT: /* Restart a proc */ 387 /* 388 * NOTE: syscall entry deals with stopevents and may run without 389 * the MP lock. However, the caller is presumably interlocked 390 * by having waited. 391 */ 392 if (procp->p_step == 0) { 393 error = EINVAL; /* Can only start a stopped process */ 394 goto done; 395 } 396 if ((signo = *(int*)ap->a_data) != 0) { 397 if (signo >= NSIG || signo <= 0) { 398 error = EINVAL; 399 goto done; 400 } 401 ksignal(procp, signo); 402 } 403 procp->p_step = 0; 404 wakeup(&procp->p_step); 405 break; 406 default: 407 error = ENOTTY; 408 goto done; 409 } 410 error = 0; 411 done: 412 pfs_pdone(procp); 413 return 0; 414 } 415 416 /* 417 * do block mapping for pfsnode (vp). 418 * since we don't use the buffer cache 419 * for procfs this function should never 420 * be called. in any case, it's not clear 421 * what part of the kernel ever makes use 422 * of this function. for sanity, this is the 423 * usual no-op bmap, although returning 424 * (EIO) would be a reasonable alternative. 425 * 426 * XXX mmap assumes buffer cache operation 427 * 428 * procfs_bmap(struct vnode *a_vp, off_t a_loffset, 429 * off_t *a_doffsetp, int *a_runp, int *a_runb) 430 */ 431 static int 432 procfs_bmap(struct vop_bmap_args *ap) 433 { 434 if (ap->a_doffsetp != NULL) 435 *ap->a_doffsetp = ap->a_loffset; 436 if (ap->a_runp != NULL) 437 *ap->a_runp = 0; 438 if (ap->a_runb != NULL) 439 *ap->a_runb = 0; 440 return (0); 441 } 442 443 /* 444 * procfs_inactive is called when the pfsnode 445 * is vrele'd and the reference count goes 446 * to zero. (vp) will be on the vnode free 447 * list, so to get it back vget() must be 448 * used. 449 * 450 * (vp) is locked on entry, but must be unlocked on exit. 451 * 452 * procfs_inactive(struct vnode *a_vp) 453 */ 454 static int 455 procfs_inactive(struct vop_inactive_args *ap) 456 { 457 struct pfsnode *pfs = VTOPFS(ap->a_vp); 458 459 if (pfs->pfs_pid & PFS_DEAD) 460 vrecycle(ap->a_vp); 461 return (0); 462 } 463 464 /* 465 * _reclaim is called when getnewvnode() 466 * wants to make use of an entry on the vnode 467 * free list. at this time the filesystem needs 468 * to free any private data and remove the node 469 * from any private lists. 470 * 471 * procfs_reclaim(struct vnode *a_vp) 472 */ 473 static int 474 procfs_reclaim(struct vop_reclaim_args *ap) 475 { 476 return (procfs_freevp(ap->a_vp)); 477 } 478 479 /* 480 * _print is used for debugging. 481 * just print a readable description 482 * of (vp). 483 * 484 * procfs_print(struct vnode *a_vp) 485 */ 486 static int 487 procfs_print(struct vop_print_args *ap) 488 { 489 struct pfsnode *pfs = VTOPFS(ap->a_vp); 490 491 kprintf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n", 492 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 493 return (0); 494 } 495 496 /* 497 * generic entry point for unsupported operations 498 */ 499 static int 500 procfs_badop(struct vop_generic_args *ap) 501 { 502 return (EIO); 503 } 504 505 /* 506 * Invent attributes for pfsnode (vp) and store 507 * them in (vap). 508 * Directories lengths are returned as zero since 509 * any real length would require the genuine size 510 * to be computed, and nothing cares anyway. 511 * 512 * this is relatively minimal for procfs. 513 * 514 * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap) 515 */ 516 static int 517 procfs_getattr(struct vop_getattr_args *ap) 518 { 519 struct pfsnode *pfs = VTOPFS(ap->a_vp); 520 struct vattr *vap = ap->a_vap; 521 struct proc *procp; 522 int error; 523 524 /* 525 * First make sure that the process and its credentials 526 * still exist. 527 */ 528 switch (pfs->pfs_type) { 529 case Proot: 530 case Pcurproc: 531 procp = NULL; 532 break; 533 default: 534 procp = pfs_pfind(pfs->pfs_pid); 535 if (procp == NULL || procp->p_ucred == NULL) { 536 error = ENOENT; 537 goto done; 538 } 539 break; 540 } 541 542 error = 0; 543 544 /* start by zeroing out the attributes */ 545 VATTR_NULL(vap); 546 547 /* next do all the common fields */ 548 vap->va_type = ap->a_vp->v_type; 549 vap->va_mode = pfs->pfs_mode; 550 vap->va_fileid = pfs->pfs_fileno; 551 vap->va_flags = 0; 552 vap->va_blocksize = PAGE_SIZE; 553 vap->va_bytes = vap->va_size = 0; 554 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 555 556 /* 557 * Make all times be current TOD. 558 * It would be possible to get the process start 559 * time from the p_stat structure, but there's 560 * no "file creation" time stamp anyway, and the 561 * p_stat structure is not addressible if u. gets 562 * swapped out for that process. 563 */ 564 nanotime(&vap->va_ctime); 565 vap->va_atime = vap->va_mtime = vap->va_ctime; 566 567 /* 568 * If the process has exercised some setuid or setgid 569 * privilege, then rip away read/write permission so 570 * that only root can gain access. 571 */ 572 switch (pfs->pfs_type) { 573 case Pctl: 574 case Pregs: 575 case Pfpregs: 576 case Pdbregs: 577 case Pmem: 578 if (procp->p_flags & P_SUGID) { 579 vap->va_mode &= ~((VREAD|VWRITE)| 580 ((VREAD|VWRITE)>>3)| 581 ((VREAD|VWRITE)>>6)); 582 } 583 break; 584 default: 585 break; 586 } 587 588 /* 589 * now do the object specific fields 590 * 591 * The size could be set from struct reg, but it's hardly 592 * worth the trouble, and it puts some (potentially) machine 593 * dependent data into this machine-independent code. If it 594 * becomes important then this function should break out into 595 * a per-file stat function in the corresponding .c file. 596 */ 597 598 vap->va_nlink = 1; 599 if (procp) { 600 if (procp->p_ucred) { 601 vap->va_uid = procp->p_ucred->cr_uid; 602 vap->va_gid = procp->p_ucred->cr_gid; 603 } else { 604 vap->va_uid = -1; 605 vap->va_gid = -1; 606 } 607 } 608 609 switch (pfs->pfs_type) { 610 case Proot: 611 /* 612 * Set nlink to 1 to tell fts(3) we don't actually know. 613 */ 614 vap->va_nlink = 1; 615 vap->va_uid = 0; 616 vap->va_gid = 0; 617 vap->va_size = vap->va_bytes = DEV_BSIZE; 618 break; 619 620 case Pcurproc: { 621 char buf[16]; /* should be enough */ 622 623 vap->va_uid = 0; 624 vap->va_gid = 0; 625 vap->va_size = ksnprintf(buf, sizeof(buf), 626 "%ld", (long)curproc->p_pid); 627 vap->va_bytes = vap->va_size; 628 break; 629 } 630 631 case Pproc: 632 vap->va_nlink = nproc_targets; 633 vap->va_size = vap->va_bytes = DEV_BSIZE; 634 break; 635 636 case Pfile: { 637 char *fullpath, *freepath; 638 639 if (procp->p_textnch.ncp) { 640 struct nchandle nch; 641 642 cache_copy(&procp->p_textnch, &nch); 643 error = cache_fullpath(procp, &nch, NULL, 644 &fullpath, &freepath, 0); 645 cache_drop(&nch); 646 } else { 647 error = EINVAL; 648 } 649 650 if (error == 0) { 651 vap->va_size = strlen(fullpath); 652 kfree(freepath, M_TEMP); 653 } else { 654 vap->va_size = sizeof("unknown") - 1; 655 error = 0; 656 } 657 vap->va_bytes = vap->va_size; 658 break; 659 } 660 661 case Pmem: 662 /* 663 * If we denied owner access earlier, then we have to 664 * change the owner to root - otherwise 'ps' and friends 665 * will break even though they are setgid kmem. *SIGH* 666 */ 667 if (procp->p_flags & P_SUGID) 668 vap->va_uid = 0; 669 else if (procp->p_ucred) 670 vap->va_uid = procp->p_ucred->cr_uid; 671 else 672 vap->va_uid = -1; 673 break; 674 675 case Pregs: 676 vap->va_bytes = vap->va_size = sizeof(struct reg); 677 break; 678 679 case Pfpregs: 680 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 681 break; 682 683 case Pdbregs: 684 vap->va_bytes = vap->va_size = sizeof(struct dbreg); 685 break; 686 687 case Ptype: 688 case Pmap: 689 case Pctl: 690 case Pstatus: 691 case Pnote: 692 case Pnotepg: 693 case Pcmdline: 694 case Prlimit: 695 break; 696 697 default: 698 panic("procfs_getattr"); 699 } 700 done: 701 pfs_pdone(procp); 702 return (error); 703 } 704 705 /* 706 * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap, 707 * struct ucred *a_cred) 708 */ 709 static int 710 procfs_setattr(struct vop_setattr_args *ap) 711 { 712 if (ap->a_vap->va_flags != VNOVAL) 713 return (EOPNOTSUPP); 714 715 /* 716 * just fake out attribute setting 717 * it's not good to generate an error 718 * return, otherwise things like creat() 719 * will fail when they try to set the 720 * file length to 0. worse, this means 721 * that echo $note > /proc/$pid/note will fail. 722 */ 723 724 return (0); 725 } 726 727 /* 728 * implement access checking. 729 * 730 * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred) 731 */ 732 static int 733 procfs_access(struct vop_access_args *ap) 734 { 735 struct vattr vattr; 736 int error; 737 738 error = VOP_GETATTR(ap->a_vp, &vattr); 739 if (!error) 740 error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid, 741 vattr.va_mode, 0); 742 return (error); 743 } 744 745 /* 746 * lookup. this is incredibly complicated in the general case, however 747 * for most pseudo-filesystems very little needs to be done. 748 * 749 * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 750 * struct componentname *a_cnp) 751 */ 752 static int 753 procfs_lookup(struct vop_old_lookup_args *ap) 754 { 755 struct componentname *cnp = ap->a_cnp; 756 struct vnode **vpp = ap->a_vpp; 757 struct vnode *dvp = ap->a_dvp; 758 char *pname = cnp->cn_nameptr; 759 /* struct proc *curp = cnp->cn_proc; */ 760 struct proc_target *pt; 761 pid_t pid; 762 struct pfsnode *pfs; 763 struct proc *p; 764 struct lwp *lp; 765 int i; 766 int error; 767 768 *vpp = NULL; 769 770 if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME) 771 return (EROFS); 772 773 p = NULL; 774 error = 0; 775 if (cnp->cn_namelen == 1 && *pname == '.') { 776 *vpp = dvp; 777 vref(*vpp); 778 goto out; 779 } 780 781 pfs = VTOPFS(dvp); 782 switch (pfs->pfs_type) { 783 case Proot: 784 if (cnp->cn_flags & CNP_ISDOTDOT) 785 return (EIO); 786 787 if (CNEQ(cnp, "curproc", 7)) { 788 error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc); 789 goto out; 790 } 791 792 pid = atopid(pname, cnp->cn_namelen); 793 if (pid == NO_PID) 794 break; 795 796 p = pfs_pfind(pid); 797 if (p == NULL) 798 break; 799 800 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 801 break; 802 803 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 804 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 805 break; 806 807 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 808 goto out; 809 810 case Pproc: 811 if (cnp->cn_flags & CNP_ISDOTDOT) { 812 error = procfs_root(dvp->v_mount, vpp); 813 goto out; 814 } 815 816 p = pfs_pfind(pfs->pfs_pid); 817 if (p == NULL) 818 break; 819 /* XXX lwp */ 820 lp = FIRST_LWP_IN_PROC(p); 821 822 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 823 break; 824 825 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 826 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 827 break; 828 829 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 830 if (cnp->cn_namelen == pt->pt_namlen && 831 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 832 (pt->pt_valid == NULL || (*pt->pt_valid)(lp))) 833 goto found; 834 } 835 break; 836 found: 837 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 838 pt->pt_pfstype); 839 goto out; 840 841 default: 842 error = ENOTDIR; 843 goto out; 844 } 845 if (cnp->cn_nameiop == NAMEI_LOOKUP) 846 error = ENOENT; 847 else 848 error = EROFS; 849 /* 850 * If no error occured *vpp will hold a referenced locked vnode. 851 * dvp was passed to us locked and *vpp must be returned locked. 852 * If *vpp != dvp then we should unlock dvp if (1) this is not the 853 * last component or (2) CNP_LOCKPARENT is not set. 854 */ 855 out: 856 if (error == 0 && *vpp != dvp) { 857 if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) { 858 cnp->cn_flags |= CNP_PDIRUNLOCK; 859 vn_unlock(dvp); 860 } 861 } 862 pfs_pdone(p); 863 return (error); 864 } 865 866 /* 867 * Does this process have a text file? 868 */ 869 int 870 procfs_validfile(struct lwp *lp) 871 { 872 return (procfs_findtextvp(lp->lwp_proc) != NULLVP); 873 } 874 875 /* 876 * readdir() returns directory entries from pfsnode (vp). 877 * 878 * We generate just one directory entry at a time, as it would probably 879 * not pay off to buffer several entries locally to save uiomove calls. 880 * 881 * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred, 882 * int *a_eofflag, int *a_ncookies, off_t **a_cookies) 883 */ 884 static int 885 procfs_readdir(struct vop_readdir_args *ap) 886 { 887 struct pfsnode *pfs; 888 int error; 889 890 if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX) 891 return (EINVAL); 892 error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 893 if (error) 894 return (error); 895 pfs = VTOPFS(ap->a_vp); 896 897 switch (pfs->pfs_type) { 898 case Pproc: 899 /* 900 * this is for the process-specific sub-directories. 901 * all that is needed to is copy out all the entries 902 * from the procent[] table (top of this file). 903 */ 904 error = procfs_readdir_proc(ap); 905 break; 906 case Proot: 907 /* 908 * this is for the root of the procfs filesystem 909 * what is needed is a special entry for "curproc" 910 * followed by an entry for each process on allproc 911 */ 912 error = procfs_readdir_root(ap); 913 break; 914 default: 915 error = ENOTDIR; 916 break; 917 } 918 919 vn_unlock(ap->a_vp); 920 return (error); 921 } 922 923 static int 924 procfs_readdir_proc(struct vop_readdir_args *ap) 925 { 926 struct pfsnode *pfs; 927 int error, i, retval; 928 struct proc *p; 929 struct lwp *lp; 930 struct proc_target *pt; 931 struct uio *uio = ap->a_uio; 932 933 pfs = VTOPFS(ap->a_vp); 934 p = pfs_pfind(pfs->pfs_pid); 935 if (p == NULL) 936 return(0); 937 if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) { 938 error = 0; 939 goto done; 940 } 941 /* XXX lwp, not MPSAFE */ 942 lp = FIRST_LWP_IN_PROC(p); 943 944 error = 0; 945 i = (int)uio->uio_offset; 946 if (i < 0) { 947 error = EINVAL; 948 goto done; 949 } 950 951 for (pt = &proc_targets[i]; 952 !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) { 953 if (pt->pt_valid && (*pt->pt_valid)(lp) == 0) 954 continue; 955 956 retval = vop_write_dirent(&error, uio, 957 PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type, 958 pt->pt_namlen, pt->pt_name); 959 if (retval) 960 break; 961 } 962 963 uio->uio_offset = (off_t)i; 964 error = 0; 965 done: 966 pfs_pdone(p); 967 return error; 968 } 969 970 struct procfs_readdir_root_info { 971 int error; 972 int i; 973 int pcnt; 974 struct uio *uio; 975 struct ucred *cred; 976 }; 977 978 static int procfs_readdir_root_callback(struct proc *p, void *data); 979 980 static int 981 procfs_readdir_root(struct vop_readdir_args *ap) 982 { 983 struct procfs_readdir_root_info info; 984 struct uio *uio = ap->a_uio; 985 int res; 986 987 res = 0; 988 info.error = 0; 989 info.i = (int)uio->uio_offset; 990 991 if (info.i < 0) 992 return (EINVAL); 993 994 info.pcnt = 0; 995 info.uio = uio; 996 info.cred = ap->a_cred; 997 while (info.pcnt < 3) { 998 res = procfs_readdir_root_callback(NULL, &info); 999 if (res < 0) 1000 break; 1001 } 1002 if (res >= 0) 1003 allproc_scan(procfs_readdir_root_callback, &info); 1004 uio->uio_offset = (off_t)info.i; 1005 1006 return (info.error); 1007 } 1008 1009 static int 1010 procfs_readdir_root_callback(struct proc *p, void *data) 1011 { 1012 struct procfs_readdir_root_info *info = data; 1013 struct uio *uio; 1014 int retval; 1015 ino_t d_ino; 1016 const char *d_name; 1017 char d_name_pid[20]; 1018 size_t d_namlen; 1019 uint8_t d_type; 1020 1021 uio = info->uio; 1022 1023 if (uio->uio_resid <= 0 || info->error) 1024 return(-1); 1025 1026 switch (info->pcnt) { 1027 case 0: /* `.' */ 1028 d_ino = PROCFS_FILENO(0, Proot); 1029 d_name = "."; 1030 d_namlen = 1; 1031 d_type = DT_DIR; 1032 break; 1033 case 1: /* `..' */ 1034 d_ino = PROCFS_FILENO(0, Proot); 1035 d_name = ".."; 1036 d_namlen = 2; 1037 d_type = DT_DIR; 1038 break; 1039 1040 case 2: 1041 d_ino = PROCFS_FILENO(0, Pcurproc); 1042 d_namlen = 7; 1043 d_name = "curproc"; 1044 d_type = DT_LNK; 1045 break; 1046 1047 1048 default: 1049 if (!PRISON_CHECK(info->cred, p->p_ucred)) 1050 return(0); 1051 if (ps_showallprocs == 0 && 1052 info->cred->cr_uid != 0 && 1053 info->cred->cr_uid != p->p_ucred->cr_uid) { 1054 return(0); 1055 } 1056 1057 /* 1058 * Skip entries we have already returned (optimization) 1059 */ 1060 if (info->pcnt < info->i) { 1061 ++info->pcnt; 1062 return(0); 1063 } 1064 1065 d_ino = PROCFS_FILENO(p->p_pid, Pproc); 1066 d_namlen = ksnprintf(d_name_pid, sizeof(d_name_pid), 1067 "%ld", (long)p->p_pid); 1068 d_name = d_name_pid; 1069 d_type = DT_DIR; 1070 break; 1071 } 1072 1073 /* 1074 * Skip entries we have already returned (optimization) 1075 */ 1076 if (info->pcnt < info->i) { 1077 ++info->pcnt; 1078 return(0); 1079 } 1080 1081 retval = vop_write_dirent(&info->error, uio, 1082 d_ino, d_type, d_namlen, d_name); 1083 if (retval) 1084 return(-1); 1085 ++info->pcnt; 1086 ++info->i; 1087 return(0); 1088 } 1089 1090 /* 1091 * readlink reads the link of `curproc' or `file' 1092 */ 1093 static int 1094 procfs_readlink(struct vop_readlink_args *ap) 1095 { 1096 char buf[16]; /* should be enough */ 1097 struct proc *procp; 1098 struct vnode *vp = ap->a_vp; 1099 struct pfsnode *pfs = VTOPFS(vp); 1100 char *fullpath, *freepath; 1101 int error, len; 1102 1103 switch (pfs->pfs_type) { 1104 case Pcurproc: 1105 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc)) 1106 return (EINVAL); 1107 1108 len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); 1109 1110 return (uiomove(buf, len, ap->a_uio)); 1111 case Pfile: 1112 /* 1113 * procfs's directory topology is somewhat asynchronous from 1114 * reality so it is possible for pid requests to race exiting 1115 * processes. In this situation, bit 31 is set in 1116 * pfs->pfs_pid which guarantees that pfs_pfind() will return 1117 * NULL. 1118 * 1119 * It is also possible to catch a process in the middle of 1120 * an exit sequence so various fields might wind up being 1121 * NULL that are not normally NULL. 1122 */ 1123 procp = pfs_pfind(pfs->pfs_pid); 1124 if (procp == NULL || procp->p_ucred == NULL) { 1125 pfs_pdone(procp); 1126 return (uiomove("unknown", sizeof("unknown") - 1, 1127 ap->a_uio)); 1128 } 1129 if (procp->p_textnch.ncp) { 1130 struct nchandle nch; 1131 1132 cache_copy(&procp->p_textnch, &nch); 1133 error = cache_fullpath(procp, &nch, NULL, 1134 &fullpath, &freepath, 0); 1135 cache_drop(&nch); 1136 } else { 1137 error = EINVAL; 1138 } 1139 1140 if (error != 0) { 1141 pfs_pdone(procp); 1142 return (uiomove("unknown", sizeof("unknown") - 1, 1143 ap->a_uio)); 1144 } 1145 error = uiomove(fullpath, strlen(fullpath), ap->a_uio); 1146 kfree(freepath, M_TEMP); 1147 pfs_pdone(procp); 1148 return (error); 1149 default: 1150 return (EINVAL); 1151 } 1152 } 1153 1154 /* 1155 * convert decimal ascii to pid_t 1156 */ 1157 static pid_t 1158 atopid(const char *b, u_int len) 1159 { 1160 pid_t p = 0; 1161 1162 while (len--) { 1163 char c = *b++; 1164 if (c < '0' || c > '9') 1165 return (NO_PID); 1166 p = 10 * p + (c - '0'); 1167 if (p > PID_MAX) 1168 return (NO_PID); 1169 } 1170 1171 return (p); 1172 } 1173 1174