1 /* 2 * Copyright (c) 1993 Jan-Simon Pendry 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)procfs_vnops.c 8.4 (Berkeley) 01/21/94 12 * 13 * From: 14 * $Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $ 15 */ 16 17 /* 18 * procfs vnode interface 19 */ 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/time.h> 24 #include <sys/kernel.h> 25 #include <sys/file.h> 26 #include <sys/proc.h> 27 #include <sys/vnode.h> 28 #include <sys/namei.h> 29 #include <sys/malloc.h> 30 #include <sys/dirent.h> 31 #include <sys/resourcevar.h> 32 #include <miscfs/procfs/procfs.h> 33 #include <vm/vm.h> /* for PAGE_SIZE */ 34 35 /* 36 * Vnode Operations. 37 * 38 */ 39 40 /* 41 * This is a list of the valid names in the 42 * process-specific sub-directories. It is 43 * used in procfs_lookup and procfs_readdir 44 */ 45 static struct pfsnames { 46 u_short d_namlen; 47 char d_name[PROCFS_NAMELEN]; 48 pfstype d_pfstype; 49 } procent[] = { 50 #define N(s) sizeof(s)-1, s 51 /* namlen, nam, type */ 52 { N("file"), Pfile }, 53 { N("mem"), Pmem }, 54 { N("regs"), Pregs }, 55 { N("ctl"), Pctl }, 56 { N("status"), Pstatus }, 57 { N("note"), Pnote }, 58 { N("notepg"), Pnotepg }, 59 #undef N 60 }; 61 #define Nprocent (sizeof(procent)/sizeof(procent[0])) 62 63 static pid_t atopid __P((const char *, u_int)); 64 65 /* 66 * set things up for doing i/o on 67 * the pfsnode (vp). (vp) is locked 68 * on entry, and should be left locked 69 * on exit. 70 * 71 * for procfs we don't need to do anything 72 * in particular for i/o. all that is done 73 * is to support exclusive open on process 74 * memory images. 75 */ 76 procfs_open(ap) 77 struct vop_open_args *ap; 78 { 79 struct pfsnode *pfs = VTOPFS(ap->a_vp); 80 81 switch (pfs->pfs_type) { 82 case Pmem: 83 if (PFIND(pfs->pfs_pid) == 0) 84 return (ENOENT); /* was ESRCH, jsp */ 85 86 if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) || 87 (pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)) 88 return (EBUSY); 89 90 91 if (ap->a_mode & FWRITE) 92 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 93 94 return (0); 95 96 default: 97 break; 98 } 99 100 return (0); 101 } 102 103 /* 104 * close the pfsnode (vp) after doing i/o. 105 * (vp) is not locked on entry or exit. 106 * 107 * nothing to do for procfs other than undo 108 * any exclusive open flag (see _open above). 109 */ 110 procfs_close(ap) 111 struct vop_close_args *ap; 112 { 113 struct pfsnode *pfs = VTOPFS(ap->a_vp); 114 115 switch (pfs->pfs_type) { 116 case Pmem: 117 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 118 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 119 break; 120 } 121 122 return (0); 123 } 124 125 /* 126 * do an ioctl operation on pfsnode (vp). 127 * (vp) is not locked on entry or exit. 128 */ 129 procfs_ioctl(ap) 130 struct vop_ioctl_args *ap; 131 { 132 133 return (ENOTTY); 134 } 135 136 /* 137 * do block mapping for pfsnode (vp). 138 * since we don't use the buffer cache 139 * for procfs this function should never 140 * be called. in any case, it's not clear 141 * what part of the kernel ever makes use 142 * of this function. for sanity, this is the 143 * usual no-op bmap, although returning 144 * (EIO) would be a reasonable alternative. 145 */ 146 procfs_bmap(ap) 147 struct vop_bmap_args *ap; 148 { 149 150 if (ap->a_vpp != NULL) 151 *ap->a_vpp = ap->a_vp; 152 if (ap->a_bnp != NULL) 153 *ap->a_bnp = ap->a_bn; 154 return (0); 155 } 156 157 /* 158 * _inactive is called when the pfsnode 159 * is vrele'd and the reference count goes 160 * to zero. (vp) will be on the vnode free 161 * list, so to get it back vget() must be 162 * used. 163 * 164 * for procfs, check if the process is still 165 * alive and if it isn't then just throw away 166 * the vnode by calling vgone(). this may 167 * be overkill and a waste of time since the 168 * chances are that the process will still be 169 * there and PFIND is not free. 170 * 171 * (vp) is not locked on entry or exit. 172 */ 173 procfs_inactive(ap) 174 struct vop_inactive_args *ap; 175 { 176 struct pfsnode *pfs = VTOPFS(ap->a_vp); 177 178 if (PFIND(pfs->pfs_pid) == 0) 179 vgone(ap->a_vp); 180 181 return (0); 182 } 183 184 /* 185 * _reclaim is called when getnewvnode() 186 * wants to make use of an entry on the vnode 187 * free list. at this time the filesystem needs 188 * to free any private data and remove the node 189 * from any private lists. 190 */ 191 procfs_reclaim(ap) 192 struct vop_reclaim_args *ap; 193 { 194 int error; 195 196 error = procfs_freevp(ap->a_vp); 197 return (error); 198 } 199 200 /* 201 * Return POSIX pathconf information applicable to special devices. 202 */ 203 procfs_pathconf(ap) 204 struct vop_pathconf_args /* { 205 struct vnode *a_vp; 206 int a_name; 207 int *a_retval; 208 } */ *ap; 209 { 210 211 switch (ap->a_name) { 212 case _PC_LINK_MAX: 213 *ap->a_retval = LINK_MAX; 214 return (0); 215 case _PC_MAX_CANON: 216 *ap->a_retval = MAX_CANON; 217 return (0); 218 case _PC_MAX_INPUT: 219 *ap->a_retval = MAX_INPUT; 220 return (0); 221 case _PC_PIPE_BUF: 222 *ap->a_retval = PIPE_BUF; 223 return (0); 224 case _PC_CHOWN_RESTRICTED: 225 *ap->a_retval = 1; 226 return (0); 227 case _PC_VDISABLE: 228 *ap->a_retval = _POSIX_VDISABLE; 229 return (0); 230 default: 231 return (EINVAL); 232 } 233 /* NOTREACHED */ 234 } 235 236 /* 237 * _print is used for debugging. 238 * just print a readable description 239 * of (vp). 240 */ 241 procfs_print(ap) 242 struct vop_print_args *ap; 243 { 244 struct pfsnode *pfs = VTOPFS(ap->a_vp); 245 246 printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n", 247 pfs->pfs_pid, 248 pfs->pfs_mode, pfs->pfs_flags); 249 } 250 251 /* 252 * _abortop is called when operations such as 253 * rename and create fail. this entry is responsible 254 * for undoing any side-effects caused by the lookup. 255 * this will always include freeing the pathname buffer. 256 */ 257 procfs_abortop(ap) 258 struct vop_abortop_args *ap; 259 { 260 261 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 262 FREE(ap->a_cnp->cn_pnbuf, M_NAMEI); 263 return (0); 264 } 265 266 /* 267 * generic entry point for unsupported operations 268 */ 269 procfs_badop() 270 { 271 272 return (EIO); 273 } 274 275 /* 276 * Invent attributes for pfsnode (vp) and store 277 * them in (vap). 278 * Directories lengths are returned as zero since 279 * any real length would require the genuine size 280 * to be computed, and nothing cares anyway. 281 * 282 * this is relatively minimal for procfs. 283 */ 284 procfs_getattr(ap) 285 struct vop_getattr_args *ap; 286 { 287 struct pfsnode *pfs = VTOPFS(ap->a_vp); 288 struct vattr *vap = ap->a_vap; 289 struct proc *procp; 290 int error; 291 292 /* first check the process still exists */ 293 procp = PFIND(pfs->pfs_pid); 294 if (procp == 0) 295 return (ENOENT); 296 297 error = 0; 298 299 /* start by zeroing out the attributes */ 300 VATTR_NULL(vap); 301 302 /* next do all the common fields */ 303 vap->va_type = ap->a_vp->v_type; 304 vap->va_mode = pfs->pfs_mode; 305 vap->va_fileid = pfs->pfs_fileno; 306 vap->va_flags = 0; 307 vap->va_blocksize = PAGE_SIZE; 308 vap->va_bytes = vap->va_size = 0; 309 310 /* 311 * Make all times be current TOD. 312 * It would be possible to get the process start 313 * time from the p_stat structure, but there's 314 * no "file creation" time stamp anyway, and the 315 * p_stat structure is not addressible if u. gets 316 * swapped out for that process. 317 */ 318 microtime(&vap->va_ctime); 319 vap->va_atime = vap->va_mtime = vap->va_ctime; 320 321 /* 322 * now do the object specific fields 323 * 324 * The size could be set from struct reg, but it's hardly 325 * worth the trouble, and it puts some (potentially) machine 326 * dependent data into this machine-independent code. If it 327 * becomes important then this function should break out into 328 * a per-file stat function in the corresponding .c file. 329 */ 330 331 switch (pfs->pfs_type) { 332 case Proot: 333 vap->va_nlink = 2; 334 vap->va_uid = 0; 335 vap->va_gid = 0; 336 break; 337 338 case Pproc: 339 vap->va_nlink = 2; 340 vap->va_uid = procp->p_ucred->cr_uid; 341 vap->va_gid = procp->p_ucred->cr_gid; 342 break; 343 344 case Pfile: 345 error = EOPNOTSUPP; 346 break; 347 348 case Pmem: 349 vap->va_nlink = 1; 350 vap->va_bytes = vap->va_size = 351 ctob(procp->p_vmspace->vm_tsize + 352 procp->p_vmspace->vm_dsize + 353 procp->p_vmspace->vm_ssize); 354 vap->va_uid = procp->p_ucred->cr_uid; 355 vap->va_gid = procp->p_ucred->cr_gid; 356 break; 357 358 case Pregs: 359 case Pctl: 360 case Pstatus: 361 case Pnote: 362 case Pnotepg: 363 vap->va_nlink = 1; 364 vap->va_uid = procp->p_ucred->cr_uid; 365 vap->va_gid = procp->p_ucred->cr_gid; 366 break; 367 368 default: 369 panic("procfs_getattr"); 370 } 371 372 return (error); 373 } 374 375 procfs_setattr(ap) 376 struct vop_setattr_args *ap; 377 { 378 /* 379 * just fake out attribute setting 380 * it's not good to generate an error 381 * return, otherwise things like creat() 382 * will fail when they try to set the 383 * file length to 0. worse, this means 384 * that echo $note > /proc/$pid/note will fail. 385 */ 386 387 return (0); 388 } 389 390 /* 391 * implement access checking. 392 * 393 * something very similar to this code is duplicated 394 * throughout the 4bsd kernel and should be moved 395 * into kern/vfs_subr.c sometime. 396 * 397 * actually, the check for super-user is slightly 398 * broken since it will allow read access to write-only 399 * objects. this doesn't cause any particular trouble 400 * but does mean that the i/o entry points need to check 401 * that the operation really does make sense. 402 */ 403 procfs_access(ap) 404 struct vop_access_args *ap; 405 { 406 struct vattr *vap; 407 struct vattr vattr; 408 int error; 409 410 /* 411 * If you're the super-user, 412 * you always get access. 413 */ 414 if (ap->a_cred->cr_uid == (uid_t) 0) 415 return (0); 416 vap = &vattr; 417 if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p)) 418 return (error); 419 420 /* 421 * Access check is based on only one of owner, group, public. 422 * If not owner, then check group. If not a member of the 423 * group, then check public access. 424 */ 425 if (ap->a_cred->cr_uid != vap->va_uid) { 426 gid_t *gp; 427 int i; 428 429 (ap->a_mode) >>= 3; 430 gp = ap->a_cred->cr_groups; 431 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++) 432 if (vap->va_gid == *gp) 433 goto found; 434 ap->a_mode >>= 3; 435 found: 436 ; 437 } 438 439 if ((vap->va_mode & ap->a_mode) == ap->a_mode) 440 return (0); 441 442 return (EACCES); 443 } 444 445 /* 446 * lookup. this is incredibly complicated in the 447 * general case, however for most pseudo-filesystems 448 * very little needs to be done. 449 * 450 * unless you want to get a migraine, just make sure your 451 * filesystem doesn't do any locking of its own. otherwise 452 * read and inwardly digest ufs_lookup(). 453 */ 454 procfs_lookup(ap) 455 struct vop_lookup_args *ap; 456 { 457 struct componentname *cnp = ap->a_cnp; 458 struct vnode **vpp = ap->a_vpp; 459 struct vnode *dvp = ap->a_dvp; 460 char *pname = cnp->cn_nameptr; 461 int error = 0; 462 pid_t pid; 463 struct vnode *nvp; 464 struct pfsnode *pfs; 465 struct proc *procp; 466 pfstype pfs_type; 467 int i; 468 469 if (cnp->cn_namelen == 1 && *pname == '.') { 470 *vpp = dvp; 471 VREF(dvp); 472 /*VOP_LOCK(dvp);*/ 473 return (0); 474 } 475 476 *vpp = NULL; 477 478 pfs = VTOPFS(dvp); 479 switch (pfs->pfs_type) { 480 case Proot: 481 if (cnp->cn_flags & ISDOTDOT) 482 return (EIO); 483 484 if (CNEQ(cnp, "curproc", 7)) 485 pid = cnp->cn_proc->p_pid; 486 else 487 pid = atopid(pname, cnp->cn_namelen); 488 if (pid == NO_PID) 489 return (ENOENT); 490 491 procp = PFIND(pid); 492 if (procp == 0) 493 return (ENOENT); 494 495 error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc); 496 if (error) 497 return (error); 498 499 nvp->v_type = VDIR; 500 pfs = VTOPFS(nvp); 501 502 *vpp = nvp; 503 return (0); 504 505 case Pproc: 506 if (cnp->cn_flags & ISDOTDOT) { 507 error = procfs_root(dvp->v_mount, vpp); 508 return (error); 509 } 510 511 procp = PFIND(pfs->pfs_pid); 512 if (procp == 0) 513 return (ENOENT); 514 515 for (i = 0; i < Nprocent; i++) { 516 struct pfsnames *dp = &procent[i]; 517 518 if (cnp->cn_namelen == dp->d_namlen && 519 bcmp(pname, dp->d_name, dp->d_namlen) == 0) { 520 pfs_type = dp->d_pfstype; 521 goto found; 522 } 523 } 524 return (ENOENT); 525 526 found: 527 if (pfs_type == Pfile) { 528 nvp = procfs_findtextvp(procp); 529 if (nvp) { 530 VREF(nvp); 531 VOP_LOCK(nvp); 532 } else { 533 error = ENXIO; 534 } 535 } else { 536 error = procfs_allocvp(dvp->v_mount, &nvp, 537 pfs->pfs_pid, pfs_type); 538 if (error) 539 return (error); 540 541 nvp->v_type = VREG; 542 pfs = VTOPFS(nvp); 543 } 544 *vpp = nvp; 545 return (error); 546 547 default: 548 return (ENOTDIR); 549 } 550 } 551 552 /* 553 * readdir returns directory entries from pfsnode (vp). 554 * 555 * the strategy here with procfs is to generate a single 556 * directory entry at a time (struct pfsdent) and then 557 * copy that out to userland using uiomove. a more efficent 558 * though more complex implementation, would try to minimize 559 * the number of calls to uiomove(). for procfs, this is 560 * hardly worth the added code complexity. 561 * 562 * this should just be done through read() 563 */ 564 procfs_readdir(ap) 565 struct vop_readdir_args *ap; 566 { 567 struct uio *uio = ap->a_uio; 568 struct pfsdent d; 569 struct pfsdent *dp = &d; 570 struct pfsnode *pfs; 571 int error; 572 int count; 573 int i; 574 575 pfs = VTOPFS(ap->a_vp); 576 577 if (uio->uio_resid < UIO_MX) 578 return (EINVAL); 579 if (uio->uio_offset & (UIO_MX-1)) 580 return (EINVAL); 581 if (uio->uio_offset < 0) 582 return (EINVAL); 583 584 error = 0; 585 count = 0; 586 i = uio->uio_offset / UIO_MX; 587 588 switch (pfs->pfs_type) { 589 /* 590 * this is for the process-specific sub-directories. 591 * all that is needed to is copy out all the entries 592 * from the procent[] table (top of this file). 593 */ 594 case Pproc: { 595 while (uio->uio_resid >= UIO_MX) { 596 struct pfsnames *dt; 597 598 if (i >= Nprocent) 599 break; 600 601 dt = &procent[i]; 602 603 dp->d_reclen = UIO_MX; 604 dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype); 605 dp->d_type = DT_REG; 606 dp->d_namlen = dt->d_namlen; 607 bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1); 608 error = uiomove((caddr_t) dp, UIO_MX, uio); 609 if (error) 610 break; 611 count += UIO_MX; 612 i++; 613 } 614 615 break; 616 617 } 618 619 /* 620 * this is for the root of the procfs filesystem 621 * what is needed is a special entry for "curproc" 622 * followed by an entry for each process on allproc 623 #ifdef PROCFS_ZOMBIE 624 * and zombproc. 625 #endif 626 */ 627 628 case Proot: { 629 int pcnt; 630 #ifdef PROCFS_ZOMBIE 631 int doingzomb = 0; 632 #endif 633 volatile struct proc *p; 634 635 p = allproc; 636 637 #define PROCFS_XFILES 1 /* number of other entries, like "curproc" */ 638 pcnt = PROCFS_XFILES; 639 640 while (p && uio->uio_resid >= UIO_MX) { 641 bzero((char *) dp, UIO_MX); 642 dp->d_type = DT_DIR; 643 dp->d_reclen = UIO_MX; 644 645 switch (i) { 646 case 0: 647 /* ship out entry for "curproc" */ 648 dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc); 649 dp->d_namlen = sprintf(dp->d_name, "curproc"); 650 break; 651 652 default: 653 if (pcnt >= i) { 654 dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 655 dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid); 656 } 657 658 p = p->p_next; 659 660 #ifdef PROCFS_ZOMBIE 661 if (p == 0 && doingzomb == 0) { 662 doingzomb = 1; 663 p = zombproc; 664 } 665 #endif 666 667 if (pcnt++ < i) 668 continue; 669 670 break; 671 } 672 error = uiomove((caddr_t) dp, UIO_MX, uio); 673 if (error) 674 break; 675 count += UIO_MX; 676 i++; 677 } 678 679 break; 680 681 } 682 683 default: 684 error = ENOTDIR; 685 break; 686 } 687 688 uio->uio_offset = i * UIO_MX; 689 690 return (error); 691 } 692 693 /* 694 * convert decimal ascii to pid_t 695 */ 696 static pid_t 697 atopid(b, len) 698 const char *b; 699 u_int len; 700 { 701 pid_t p = 0; 702 703 while (len--) { 704 char c = *b++; 705 if (c < '0' || c > '9') 706 return (NO_PID); 707 p = 10 * p + (c - '0'); 708 if (p > PID_MAX) 709 return (NO_PID); 710 } 711 712 return (p); 713 } 714 715 /* 716 * procfs vnode operations. 717 */ 718 int (**procfs_vnodeop_p)(); 719 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 720 { &vop_default_desc, vn_default_error }, 721 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 722 { &vop_create_desc, procfs_create }, /* create */ 723 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 724 { &vop_open_desc, procfs_open }, /* open */ 725 { &vop_close_desc, procfs_close }, /* close */ 726 { &vop_access_desc, procfs_access }, /* access */ 727 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 728 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 729 { &vop_read_desc, procfs_read }, /* read */ 730 { &vop_write_desc, procfs_write }, /* write */ 731 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 732 { &vop_select_desc, procfs_select }, /* select */ 733 { &vop_mmap_desc, procfs_mmap }, /* mmap */ 734 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 735 { &vop_seek_desc, procfs_seek }, /* seek */ 736 { &vop_remove_desc, procfs_remove }, /* remove */ 737 { &vop_link_desc, procfs_link }, /* link */ 738 { &vop_rename_desc, procfs_rename }, /* rename */ 739 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 740 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 741 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 742 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 743 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 744 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 745 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 746 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 747 { &vop_lock_desc, procfs_lock }, /* lock */ 748 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 749 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 750 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 751 { &vop_print_desc, procfs_print }, /* print */ 752 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 753 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 754 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 755 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 756 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 757 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 758 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 759 { &vop_update_desc, procfs_update }, /* update */ 760 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 761 }; 762 struct vnodeopv_desc procfs_vnodeop_opv_desc = 763 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 764