1 /* 2 * Copyright (c) 1993, 1995 Jan-Simon Pendry 3 * Copyright (c) 1993, 1995 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 38 * 39 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $ 40 * $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.46 2007/11/20 21:03:50 dillon Exp $ 41 */ 42 43 /* 44 * procfs vnode interface 45 */ 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/time.h> 50 #include <sys/kernel.h> 51 #include <sys/lock.h> 52 #include <sys/fcntl.h> 53 #include <sys/proc.h> 54 #include <sys/signalvar.h> 55 #include <sys/vnode.h> 56 #include <sys/uio.h> 57 #include <sys/mount.h> 58 #include <sys/namei.h> 59 #include <sys/dirent.h> 60 #include <sys/malloc.h> 61 #include <sys/reg.h> 62 #include <vm/vm_zone.h> 63 #include <vfs/procfs/procfs.h> 64 #include <sys/pioctl.h> 65 66 #include <machine/limits.h> 67 68 static int procfs_access (struct vop_access_args *); 69 static int procfs_badop (struct vop_generic_args *); 70 static int procfs_bmap (struct vop_bmap_args *); 71 static int procfs_close (struct vop_close_args *); 72 static int procfs_getattr (struct vop_getattr_args *); 73 static int procfs_inactive (struct vop_inactive_args *); 74 static int procfs_ioctl (struct vop_ioctl_args *); 75 static int procfs_lookup (struct vop_old_lookup_args *); 76 static int procfs_open (struct vop_open_args *); 77 static int procfs_print (struct vop_print_args *); 78 static int procfs_readdir (struct vop_readdir_args *); 79 static int procfs_readlink (struct vop_readlink_args *); 80 static int procfs_reclaim (struct vop_reclaim_args *); 81 static int procfs_setattr (struct vop_setattr_args *); 82 83 static int procfs_readdir_proc(struct vop_readdir_args *); 84 static int procfs_readdir_root(struct vop_readdir_args *); 85 86 /* 87 * procfs vnode operations. 88 */ 89 struct vop_ops procfs_vnode_vops = { 90 .vop_default = vop_defaultop, 91 .vop_access = procfs_access, 92 .vop_advlock = (void *)procfs_badop, 93 .vop_bmap = procfs_bmap, 94 .vop_close = procfs_close, 95 .vop_old_create = (void *)procfs_badop, 96 .vop_getattr = procfs_getattr, 97 .vop_inactive = procfs_inactive, 98 .vop_old_link = (void *)procfs_badop, 99 .vop_old_lookup = procfs_lookup, 100 .vop_old_mkdir = (void *)procfs_badop, 101 .vop_old_mknod = (void *)procfs_badop, 102 .vop_open = procfs_open, 103 .vop_pathconf = vop_stdpathconf, 104 .vop_print = procfs_print, 105 .vop_read = procfs_rw, 106 .vop_readdir = procfs_readdir, 107 .vop_readlink = procfs_readlink, 108 .vop_reclaim = procfs_reclaim, 109 .vop_old_remove = (void *)procfs_badop, 110 .vop_old_rename = (void *)procfs_badop, 111 .vop_old_rmdir = (void *)procfs_badop, 112 .vop_setattr = procfs_setattr, 113 .vop_old_symlink = (void *)procfs_badop, 114 .vop_write = (void *)procfs_rw, 115 .vop_ioctl = procfs_ioctl 116 }; 117 118 119 /* 120 * This is a list of the valid names in the 121 * process-specific sub-directories. It is 122 * used in procfs_lookup and procfs_readdir 123 */ 124 static struct proc_target { 125 u_char pt_type; 126 u_char pt_namlen; 127 char *pt_name; 128 pfstype pt_pfstype; 129 int (*pt_valid) (struct lwp *p); 130 } proc_targets[] = { 131 #define N(s) sizeof(s)-1, s 132 /* name type validp */ 133 { DT_DIR, N("."), Pproc, NULL }, 134 { DT_DIR, N(".."), Proot, NULL }, 135 { DT_REG, N("mem"), Pmem, NULL }, 136 { DT_REG, N("regs"), Pregs, procfs_validregs }, 137 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 138 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs }, 139 { DT_REG, N("ctl"), Pctl, NULL }, 140 { DT_REG, N("status"), Pstatus, NULL }, 141 { DT_REG, N("note"), Pnote, NULL }, 142 { DT_REG, N("notepg"), Pnotepg, NULL }, 143 { DT_REG, N("map"), Pmap, procfs_validmap }, 144 { DT_REG, N("etype"), Ptype, procfs_validtype }, 145 { DT_REG, N("cmdline"), Pcmdline, NULL }, 146 { DT_REG, N("rlimit"), Prlimit, NULL }, 147 { DT_LNK, N("file"), Pfile, NULL }, 148 #undef N 149 }; 150 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 151 152 static pid_t atopid (const char *, u_int); 153 154 /* 155 * set things up for doing i/o on 156 * the pfsnode (vp). (vp) is locked 157 * on entry, and should be left locked 158 * on exit. 159 * 160 * for procfs we don't need to do anything 161 * in particular for i/o. all that is done 162 * is to support exclusive open on process 163 * memory images. 164 * 165 * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred, 166 * struct file *a_fp) 167 */ 168 static int 169 procfs_open(struct vop_open_args *ap) 170 { 171 struct pfsnode *pfs = VTOPFS(ap->a_vp); 172 struct proc *p1, *p2; 173 174 p2 = PFIND(pfs->pfs_pid); 175 if (p2 == NULL) 176 return (ENOENT); 177 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) 178 return (ENOENT); 179 180 switch (pfs->pfs_type) { 181 case Pmem: 182 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 183 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 184 return (EBUSY); 185 186 p1 = curproc; 187 KKASSERT(p1); 188 /* Can't trace a process that's currently exec'ing. */ 189 if ((p2->p_flag & P_INEXEC) != 0) 190 return EAGAIN; 191 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred)) 192 return (EPERM); 193 194 if (ap->a_mode & FWRITE) 195 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 196 197 break; 198 199 default: 200 break; 201 } 202 203 return (vop_stdopen(ap)); 204 } 205 206 /* 207 * close the pfsnode (vp) after doing i/o. 208 * (vp) is not locked on entry or exit. 209 * 210 * nothing to do for procfs other than undo 211 * any exclusive open flag (see _open above). 212 * 213 * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred) 214 */ 215 static int 216 procfs_close(struct vop_close_args *ap) 217 { 218 struct pfsnode *pfs = VTOPFS(ap->a_vp); 219 struct proc *p; 220 221 switch (pfs->pfs_type) { 222 case Pmem: 223 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 224 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 225 /* 226 * v_opencount determines the last real close on the vnode. 227 * 228 * If this is the last close, then it checks to see if 229 * the target process has PF_LINGER set in p_pfsflags, 230 * if this is *not* the case, then the process' stop flags 231 * are cleared, and the process is woken up. This is 232 * to help prevent the case where a process has been 233 * told to stop on an event, but then the requesting process 234 * has gone away or forgotten about it. 235 */ 236 if ((ap->a_vp->v_opencount < 2) 237 && (p = pfind(pfs->pfs_pid)) 238 && !(p->p_pfsflags & PF_LINGER)) { 239 p->p_stops = 0; 240 p->p_step = 0; 241 wakeup(&p->p_step); 242 } 243 break; 244 default: 245 break; 246 } 247 248 return (vop_stdclose(ap)); 249 } 250 251 /* 252 * do an ioctl operation on a pfsnode (vp). 253 * (vp) is not locked on entry or exit. 254 */ 255 static int 256 procfs_ioctl(struct vop_ioctl_args *ap) 257 { 258 struct pfsnode *pfs = VTOPFS(ap->a_vp); 259 struct proc *procp; 260 struct proc *p; 261 int error; 262 int signo; 263 struct procfs_status *psp; 264 unsigned char flags; 265 266 procp = pfind(pfs->pfs_pid); 267 if (procp == NULL) 268 return ENOTTY; 269 p = curproc; 270 if (p == NULL) 271 return EINVAL; 272 273 /* Can't trace a process that's currently exec'ing. */ 274 if ((procp->p_flag & P_INEXEC) != 0) 275 return EAGAIN; 276 if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) 277 return EPERM; 278 279 switch (ap->a_command) { 280 case PIOCBIS: 281 procp->p_stops |= *(unsigned int*)ap->a_data; 282 break; 283 case PIOCBIC: 284 procp->p_stops &= ~*(unsigned int*)ap->a_data; 285 break; 286 case PIOCSFL: 287 /* 288 * NFLAGS is "non-suser_xxx flags" -- currently, only 289 * PFS_ISUGID ("ignore set u/g id"); 290 */ 291 #define NFLAGS (PF_ISUGID) 292 flags = (unsigned char)*(unsigned int*)ap->a_data; 293 if (flags & NFLAGS && (error = suser_cred(ap->a_cred, 0))) 294 return error; 295 procp->p_pfsflags = flags; 296 break; 297 case PIOCGFL: 298 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; 299 break; 300 case PIOCSTATUS: 301 psp = (struct procfs_status *)ap->a_data; 302 psp->state = (procp->p_step == 0); 303 psp->flags = procp->p_pfsflags; 304 psp->events = procp->p_stops; 305 if (procp->p_step) { 306 psp->why = procp->p_stype; 307 psp->val = procp->p_xstat; 308 } else { 309 psp->why = psp->val = 0; /* Not defined values */ 310 } 311 break; 312 case PIOCWAIT: 313 psp = (struct procfs_status *)ap->a_data; 314 if (procp->p_step == 0) { 315 error = tsleep(&procp->p_stype, PCATCH, "piocwait", 0); 316 if (error) 317 return error; 318 } 319 psp->state = 1; /* It stopped */ 320 psp->flags = procp->p_pfsflags; 321 psp->events = procp->p_stops; 322 psp->why = procp->p_stype; /* why it stopped */ 323 psp->val = procp->p_xstat; /* any extra info */ 324 break; 325 case PIOCCONT: /* Restart a proc */ 326 if (procp->p_step == 0) 327 return EINVAL; /* Can only start a stopped process */ 328 if ((signo = *(int*)ap->a_data) != 0) { 329 if (signo >= NSIG || signo <= 0) 330 return EINVAL; 331 ksignal(procp, signo); 332 } 333 procp->p_step = 0; 334 wakeup(&procp->p_step); 335 break; 336 default: 337 return (ENOTTY); 338 } 339 return 0; 340 } 341 342 /* 343 * do block mapping for pfsnode (vp). 344 * since we don't use the buffer cache 345 * for procfs this function should never 346 * be called. in any case, it's not clear 347 * what part of the kernel ever makes use 348 * of this function. for sanity, this is the 349 * usual no-op bmap, although returning 350 * (EIO) would be a reasonable alternative. 351 * 352 * XXX mmap assumes buffer cache operation 353 * 354 * procfs_bmap(struct vnode *a_vp, off_t a_loffset, 355 * off_t *a_doffsetp, int *a_runp, int *a_runb) 356 */ 357 static int 358 procfs_bmap(struct vop_bmap_args *ap) 359 { 360 if (ap->a_doffsetp != NULL) 361 *ap->a_doffsetp = ap->a_loffset; 362 if (ap->a_runp != NULL) 363 *ap->a_runp = 0; 364 if (ap->a_runb != NULL) 365 *ap->a_runb = 0; 366 return (0); 367 } 368 369 /* 370 * procfs_inactive is called when the pfsnode 371 * is vrele'd and the reference count goes 372 * to zero. (vp) will be on the vnode free 373 * list, so to get it back vget() must be 374 * used. 375 * 376 * (vp) is locked on entry, but must be unlocked on exit. 377 * 378 * procfs_inactive(struct vnode *a_vp) 379 */ 380 static int 381 procfs_inactive(struct vop_inactive_args *ap) 382 { 383 /*struct vnode *vp = ap->a_vp;*/ 384 385 return (0); 386 } 387 388 /* 389 * _reclaim is called when getnewvnode() 390 * wants to make use of an entry on the vnode 391 * free list. at this time the filesystem needs 392 * to free any private data and remove the node 393 * from any private lists. 394 * 395 * procfs_reclaim(struct vnode *a_vp) 396 */ 397 static int 398 procfs_reclaim(struct vop_reclaim_args *ap) 399 { 400 return (procfs_freevp(ap->a_vp)); 401 } 402 403 /* 404 * _print is used for debugging. 405 * just print a readable description 406 * of (vp). 407 * 408 * procfs_print(struct vnode *a_vp) 409 */ 410 static int 411 procfs_print(struct vop_print_args *ap) 412 { 413 struct pfsnode *pfs = VTOPFS(ap->a_vp); 414 415 kprintf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n", 416 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 417 return (0); 418 } 419 420 /* 421 * generic entry point for unsupported operations 422 */ 423 static int 424 procfs_badop(struct vop_generic_args *ap) 425 { 426 return (EIO); 427 } 428 429 /* 430 * Invent attributes for pfsnode (vp) and store 431 * them in (vap). 432 * Directories lengths are returned as zero since 433 * any real length would require the genuine size 434 * to be computed, and nothing cares anyway. 435 * 436 * this is relatively minimal for procfs. 437 * 438 * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap) 439 */ 440 static int 441 procfs_getattr(struct vop_getattr_args *ap) 442 { 443 struct pfsnode *pfs = VTOPFS(ap->a_vp); 444 struct vattr *vap = ap->a_vap; 445 struct proc *procp; 446 int error; 447 448 /* 449 * First make sure that the process and its credentials 450 * still exist. 451 */ 452 switch (pfs->pfs_type) { 453 case Proot: 454 case Pcurproc: 455 procp = 0; 456 break; 457 458 default: 459 procp = PFIND(pfs->pfs_pid); 460 if (procp == NULL || procp->p_ucred == NULL) 461 return (ENOENT); 462 } 463 464 error = 0; 465 466 /* start by zeroing out the attributes */ 467 VATTR_NULL(vap); 468 469 /* next do all the common fields */ 470 vap->va_type = ap->a_vp->v_type; 471 vap->va_mode = pfs->pfs_mode; 472 vap->va_fileid = pfs->pfs_fileno; 473 vap->va_flags = 0; 474 vap->va_blocksize = PAGE_SIZE; 475 vap->va_bytes = vap->va_size = 0; 476 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 477 478 /* 479 * Make all times be current TOD. 480 * It would be possible to get the process start 481 * time from the p_stat structure, but there's 482 * no "file creation" time stamp anyway, and the 483 * p_stat structure is not addressible if u. gets 484 * swapped out for that process. 485 */ 486 nanotime(&vap->va_ctime); 487 vap->va_atime = vap->va_mtime = vap->va_ctime; 488 489 /* 490 * If the process has exercised some setuid or setgid 491 * privilege, then rip away read/write permission so 492 * that only root can gain access. 493 */ 494 switch (pfs->pfs_type) { 495 case Pctl: 496 case Pregs: 497 case Pfpregs: 498 case Pdbregs: 499 case Pmem: 500 if (procp->p_flag & P_SUGID) 501 vap->va_mode &= ~((VREAD|VWRITE)| 502 ((VREAD|VWRITE)>>3)| 503 ((VREAD|VWRITE)>>6)); 504 break; 505 default: 506 break; 507 } 508 509 /* 510 * now do the object specific fields 511 * 512 * The size could be set from struct reg, but it's hardly 513 * worth the trouble, and it puts some (potentially) machine 514 * dependent data into this machine-independent code. If it 515 * becomes important then this function should break out into 516 * a per-file stat function in the corresponding .c file. 517 */ 518 519 vap->va_nlink = 1; 520 if (procp) { 521 vap->va_uid = procp->p_ucred->cr_uid; 522 vap->va_gid = procp->p_ucred->cr_gid; 523 } 524 525 switch (pfs->pfs_type) { 526 case Proot: 527 /* 528 * Set nlink to 1 to tell fts(3) we don't actually know. 529 */ 530 vap->va_nlink = 1; 531 vap->va_uid = 0; 532 vap->va_gid = 0; 533 vap->va_size = vap->va_bytes = DEV_BSIZE; 534 break; 535 536 case Pcurproc: { 537 char buf[16]; /* should be enough */ 538 vap->va_uid = 0; 539 vap->va_gid = 0; 540 vap->va_size = vap->va_bytes = 541 ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); 542 break; 543 } 544 545 case Pproc: 546 vap->va_nlink = nproc_targets; 547 vap->va_size = vap->va_bytes = DEV_BSIZE; 548 break; 549 550 case Pfile: { 551 char *fullpath, *freepath; 552 error = vn_fullpath(procp, NULL, &fullpath, &freepath); 553 if (error == 0) { 554 vap->va_size = strlen(fullpath); 555 kfree(freepath, M_TEMP); 556 } else { 557 vap->va_size = sizeof("unknown") - 1; 558 error = 0; 559 } 560 vap->va_bytes = vap->va_size; 561 break; 562 } 563 564 case Pmem: 565 /* 566 * If we denied owner access earlier, then we have to 567 * change the owner to root - otherwise 'ps' and friends 568 * will break even though they are setgid kmem. *SIGH* 569 */ 570 if (procp->p_flag & P_SUGID) 571 vap->va_uid = 0; 572 else 573 vap->va_uid = procp->p_ucred->cr_uid; 574 break; 575 576 case Pregs: 577 vap->va_bytes = vap->va_size = sizeof(struct reg); 578 break; 579 580 case Pfpregs: 581 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 582 break; 583 584 case Pdbregs: 585 vap->va_bytes = vap->va_size = sizeof(struct dbreg); 586 break; 587 588 case Ptype: 589 case Pmap: 590 case Pctl: 591 case Pstatus: 592 case Pnote: 593 case Pnotepg: 594 case Pcmdline: 595 case Prlimit: 596 break; 597 598 default: 599 panic("procfs_getattr"); 600 } 601 602 return (error); 603 } 604 605 /* 606 * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap, 607 * struct ucred *a_cred) 608 */ 609 static int 610 procfs_setattr(struct vop_setattr_args *ap) 611 { 612 if (ap->a_vap->va_flags != VNOVAL) 613 return (EOPNOTSUPP); 614 615 /* 616 * just fake out attribute setting 617 * it's not good to generate an error 618 * return, otherwise things like creat() 619 * will fail when they try to set the 620 * file length to 0. worse, this means 621 * that echo $note > /proc/$pid/note will fail. 622 */ 623 624 return (0); 625 } 626 627 /* 628 * implement access checking. 629 * 630 * something very similar to this code is duplicated 631 * throughout the 4bsd kernel and should be moved 632 * into kern/vfs_subr.c sometime. 633 * 634 * actually, the check for super-user is slightly 635 * broken since it will allow read access to write-only 636 * objects. this doesn't cause any particular trouble 637 * but does mean that the i/o entry points need to check 638 * that the operation really does make sense. 639 * 640 * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred) 641 */ 642 static int 643 procfs_access(struct vop_access_args *ap) 644 { 645 struct vattr *vap; 646 struct vattr vattr; 647 int error; 648 649 /* 650 * If you're the super-user, 651 * you always get access. 652 */ 653 if (ap->a_cred->cr_uid == 0) 654 return (0); 655 656 vap = &vattr; 657 error = VOP_GETATTR(ap->a_vp, vap); 658 if (error) 659 return (error); 660 661 /* 662 * Access check is based on only one of owner, group, public. 663 * If not owner, then check group. If not a member of the 664 * group, then check public access. 665 */ 666 if (ap->a_cred->cr_uid != vap->va_uid) { 667 gid_t *gp; 668 int i; 669 670 ap->a_mode >>= 3; 671 gp = ap->a_cred->cr_groups; 672 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++) 673 if (vap->va_gid == *gp) 674 goto found; 675 ap->a_mode >>= 3; 676 found: 677 ; 678 } 679 680 if ((vap->va_mode & ap->a_mode) == ap->a_mode) 681 return (0); 682 683 return (EACCES); 684 } 685 686 /* 687 * lookup. this is incredibly complicated in the general case, however 688 * for most pseudo-filesystems very little needs to be done. 689 * 690 * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 691 * struct componentname *a_cnp) 692 */ 693 static int 694 procfs_lookup(struct vop_old_lookup_args *ap) 695 { 696 struct componentname *cnp = ap->a_cnp; 697 struct vnode **vpp = ap->a_vpp; 698 struct vnode *dvp = ap->a_dvp; 699 char *pname = cnp->cn_nameptr; 700 /* struct proc *curp = cnp->cn_proc; */ 701 struct proc_target *pt; 702 pid_t pid; 703 struct pfsnode *pfs; 704 struct proc *p; 705 struct lwp *lp; 706 int i; 707 int error; 708 709 *vpp = NULL; 710 711 if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME) 712 return (EROFS); 713 714 error = 0; 715 if (cnp->cn_namelen == 1 && *pname == '.') { 716 *vpp = dvp; 717 vref(*vpp); 718 goto out; 719 } 720 721 pfs = VTOPFS(dvp); 722 switch (pfs->pfs_type) { 723 case Proot: 724 if (cnp->cn_flags & CNP_ISDOTDOT) 725 return (EIO); 726 727 if (CNEQ(cnp, "curproc", 7)) { 728 error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc); 729 goto out; 730 } 731 732 pid = atopid(pname, cnp->cn_namelen); 733 if (pid == NO_PID) 734 break; 735 736 p = PFIND(pid); 737 if (p == NULL) 738 break; 739 740 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 741 break; 742 743 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 744 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 745 break; 746 747 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 748 goto out; 749 750 case Pproc: 751 if (cnp->cn_flags & CNP_ISDOTDOT) { 752 error = procfs_root(dvp->v_mount, vpp); 753 goto out; 754 } 755 756 p = PFIND(pfs->pfs_pid); 757 if (p == NULL) 758 break; 759 /* XXX lwp */ 760 lp = FIRST_LWP_IN_PROC(p); 761 762 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 763 break; 764 765 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 766 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 767 break; 768 769 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 770 if (cnp->cn_namelen == pt->pt_namlen && 771 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 772 (pt->pt_valid == NULL || (*pt->pt_valid)(lp))) 773 goto found; 774 } 775 break; 776 found: 777 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 778 pt->pt_pfstype); 779 goto out; 780 781 default: 782 error = ENOTDIR; 783 goto out; 784 } 785 if (cnp->cn_nameiop == NAMEI_LOOKUP) 786 error = ENOENT; 787 else 788 error = EROFS; 789 /* 790 * If no error occured *vpp will hold a referenced locked vnode. 791 * dvp was passed to us locked and *vpp must be returned locked. 792 * If *vpp != dvp then we should unlock dvp if (1) this is not the 793 * last component or (2) CNP_LOCKPARENT is not set. 794 */ 795 out: 796 if (error == 0 && *vpp != dvp) { 797 if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) { 798 cnp->cn_flags |= CNP_PDIRUNLOCK; 799 vn_unlock(dvp); 800 } 801 } 802 return (error); 803 } 804 805 /* 806 * Does this process have a text file? 807 */ 808 int 809 procfs_validfile(struct lwp *lp) 810 { 811 return (procfs_findtextvp(lp->lwp_proc) != NULLVP); 812 } 813 814 /* 815 * readdir() returns directory entries from pfsnode (vp). 816 * 817 * We generate just one directory entry at a time, as it would probably 818 * not pay off to buffer several entries locally to save uiomove calls. 819 * 820 * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred, 821 * int *a_eofflag, int *a_ncookies, off_t **a_cookies) 822 */ 823 static int 824 procfs_readdir(struct vop_readdir_args *ap) 825 { 826 struct pfsnode *pfs; 827 int error; 828 829 if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX) 830 return (EINVAL); 831 if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0) 832 return (error); 833 pfs = VTOPFS(ap->a_vp); 834 835 switch (pfs->pfs_type) { 836 case Pproc: 837 /* 838 * this is for the process-specific sub-directories. 839 * all that is needed to is copy out all the entries 840 * from the procent[] table (top of this file). 841 */ 842 error = procfs_readdir_proc(ap); 843 break; 844 case Proot: 845 /* 846 * this is for the root of the procfs filesystem 847 * what is needed is a special entry for "curproc" 848 * followed by an entry for each process on allproc 849 */ 850 error = procfs_readdir_root(ap); 851 break; 852 default: 853 error = ENOTDIR; 854 break; 855 } 856 857 vn_unlock(ap->a_vp); 858 return (error); 859 } 860 861 static int 862 procfs_readdir_proc(struct vop_readdir_args *ap) 863 { 864 struct pfsnode *pfs; 865 int error, i, retval; 866 struct proc *p; 867 struct lwp *lp; 868 struct proc_target *pt; 869 struct uio *uio = ap->a_uio; 870 871 pfs = VTOPFS(ap->a_vp); 872 p = PFIND(pfs->pfs_pid); 873 if (p == NULL) 874 return(0); 875 if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) 876 return(0); 877 /* XXX lwp */ 878 lp = FIRST_LWP_IN_PROC(p); 879 880 error = 0; 881 i = (int)uio->uio_offset; 882 if (i < 0) 883 return (EINVAL); 884 885 for (pt = &proc_targets[i]; 886 !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) { 887 if (pt->pt_valid && (*pt->pt_valid)(lp) == 0) 888 continue; 889 890 retval = vop_write_dirent(&error, uio, 891 PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type, 892 pt->pt_namlen, pt->pt_name); 893 if (retval) 894 break; 895 } 896 897 uio->uio_offset = (off_t)i; 898 899 return(0); 900 } 901 902 struct procfs_readdir_root_info { 903 int error; 904 int i; 905 int pcnt; 906 struct uio *uio; 907 struct ucred *cred; 908 }; 909 910 static int procfs_readdir_root_callback(struct proc *p, void *data); 911 912 static int 913 procfs_readdir_root(struct vop_readdir_args *ap) 914 { 915 struct procfs_readdir_root_info info; 916 struct uio *uio = ap->a_uio; 917 int res; 918 919 info.error = 0; 920 info.i = (int)uio->uio_offset; 921 922 if (info.i < 0) 923 return (EINVAL); 924 925 info.pcnt = 0; 926 info.uio = uio; 927 info.cred = ap->a_cred; 928 while (info.pcnt < 3) { 929 res = procfs_readdir_root_callback(NULL, &info); 930 if (res < 0) 931 break; 932 } 933 if (res >= 0) 934 allproc_scan(procfs_readdir_root_callback, &info); 935 uio->uio_offset = (off_t)info.i; 936 937 return (info.error); 938 } 939 940 static int 941 procfs_readdir_root_callback(struct proc *p, void *data) 942 { 943 struct procfs_readdir_root_info *info = data; 944 struct uio *uio; 945 int retval; 946 ino_t d_ino; 947 const char *d_name; 948 char d_name_pid[20]; 949 size_t d_namlen; 950 uint8_t d_type; 951 952 uio = info->uio; 953 954 if (uio->uio_resid <= 0 || info->error) 955 return(-1); 956 957 switch (info->pcnt) { 958 case 0: /* `.' */ 959 d_ino = PROCFS_FILENO(0, Proot); 960 d_name = "."; 961 d_namlen = 1; 962 d_type = DT_DIR; 963 break; 964 case 1: /* `..' */ 965 d_ino = PROCFS_FILENO(0, Proot); 966 d_name = ".."; 967 d_namlen = 2; 968 d_type = DT_DIR; 969 break; 970 971 case 2: 972 d_ino = PROCFS_FILENO(0, Pcurproc); 973 d_namlen = 7; 974 d_name = "curproc"; 975 d_type = DT_LNK; 976 break; 977 978 979 default: 980 if (!PRISON_CHECK(info->cred, p->p_ucred)) 981 return(0); 982 if (ps_showallprocs == 0 && 983 info->cred->cr_uid != 0 && 984 info->cred->cr_uid != p->p_ucred->cr_uid) { 985 return(0); 986 } 987 988 /* 989 * Skip entries we have already returned (optimization) 990 */ 991 if (info->pcnt < info->i) { 992 ++info->pcnt; 993 return(0); 994 } 995 996 d_ino = PROCFS_FILENO(p->p_pid, Pproc); 997 d_namlen = ksnprintf(d_name_pid, sizeof(d_name_pid), 998 "%ld", (long)p->p_pid); 999 d_name = d_name_pid; 1000 d_type = DT_DIR; 1001 break; 1002 } 1003 1004 /* 1005 * Skip entries we have already returned (optimization) 1006 */ 1007 if (info->pcnt < info->i) { 1008 ++info->pcnt; 1009 return(0); 1010 } 1011 1012 retval = vop_write_dirent(&info->error, uio, 1013 d_ino, d_type, d_namlen, d_name); 1014 if (retval) 1015 return(-1); 1016 ++info->pcnt; 1017 ++info->i; 1018 return(0); 1019 } 1020 1021 /* 1022 * readlink reads the link of `curproc' or `file' 1023 */ 1024 static int 1025 procfs_readlink(struct vop_readlink_args *ap) 1026 { 1027 char buf[16]; /* should be enough */ 1028 struct proc *procp; 1029 struct vnode *vp = ap->a_vp; 1030 struct pfsnode *pfs = VTOPFS(vp); 1031 char *fullpath, *freepath; 1032 int error, len; 1033 1034 switch (pfs->pfs_type) { 1035 case Pcurproc: 1036 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc)) 1037 return (EINVAL); 1038 1039 len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); 1040 1041 return (uiomove(buf, len, ap->a_uio)); 1042 /* 1043 * There _should_ be no way for an entire process to disappear 1044 * from under us... 1045 */ 1046 case Pfile: 1047 procp = PFIND(pfs->pfs_pid); 1048 if (procp == NULL || procp->p_ucred == NULL) { 1049 kprintf("procfs_readlink: pid %d disappeared\n", 1050 pfs->pfs_pid); 1051 return (uiomove("unknown", sizeof("unknown") - 1, 1052 ap->a_uio)); 1053 } 1054 error = vn_fullpath(procp, NULL, &fullpath, &freepath); 1055 if (error != 0) 1056 return (uiomove("unknown", sizeof("unknown") - 1, 1057 ap->a_uio)); 1058 error = uiomove(fullpath, strlen(fullpath), ap->a_uio); 1059 kfree(freepath, M_TEMP); 1060 return (error); 1061 default: 1062 return (EINVAL); 1063 } 1064 } 1065 1066 /* 1067 * convert decimal ascii to pid_t 1068 */ 1069 static pid_t 1070 atopid(const char *b, u_int len) 1071 { 1072 pid_t p = 0; 1073 1074 while (len--) { 1075 char c = *b++; 1076 if (c < '0' || c > '9') 1077 return (NO_PID); 1078 p = 10 * p + (c - '0'); 1079 if (p > PID_MAX) 1080 return (NO_PID); 1081 } 1082 1083 return (p); 1084 } 1085 1086