1 /* 2 * Copyright (c) 1993 The Regents of the University of California. 3 * Copyright (c) 1993 Jan-Simon Pendry 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)procfs_vnops.c 8.2 (Berkeley) 01/06/94 12 * 13 * From: 14 * $Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $ 15 */ 16 17 /* 18 * procfs vnode interface 19 */ 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/time.h> 24 #include <sys/kernel.h> 25 #include <sys/file.h> 26 #include <sys/proc.h> 27 #include <sys/vnode.h> 28 #include <sys/namei.h> 29 #include <sys/malloc.h> 30 #include <sys/dirent.h> 31 #include <sys/resourcevar.h> 32 #include <miscfs/procfs/procfs.h> 33 #include <vm/vm.h> /* for PAGE_SIZE */ 34 35 /* 36 * Vnode Operations. 37 * 38 */ 39 40 /* 41 * This is a list of the valid names in the 42 * process-specific sub-directories. It is 43 * used in procfs_lookup and procfs_readdir 44 */ 45 static struct pfsnames { 46 u_short d_namlen; 47 char d_name[PROCFS_NAMELEN]; 48 pfstype d_pfstype; 49 } procent[] = { 50 #define N(s) sizeof(s)-1, s 51 /* namlen, nam, type */ 52 { N("file"), Pfile }, 53 { N("mem"), Pmem }, 54 { N("regs"), Pregs }, 55 { N("ctl"), Pctl }, 56 { N("status"), Pstatus }, 57 { N("note"), Pnote }, 58 { N("notepg"), Pnotepg }, 59 #undef N 60 }; 61 #define Nprocent (sizeof(procent)/sizeof(procent[0])) 62 63 static pid_t atopid __P((const char *, u_int)); 64 65 /* 66 * set things up for doing i/o on 67 * the pfsnode (vp). (vp) is locked 68 * on entry, and should be left locked 69 * on exit. 70 * 71 * for procfs we don't need to do anything 72 * in particular for i/o. all that is done 73 * is to support exclusive open on process 74 * memory images. 75 */ 76 procfs_open(ap) 77 struct vop_open_args *ap; 78 { 79 struct pfsnode *pfs = VTOPFS(ap->a_vp); 80 81 switch (pfs->pfs_type) { 82 case Pmem: 83 if (PFIND(pfs->pfs_pid) == 0) 84 return (ENOENT); /* was ESRCH, jsp */ 85 86 if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) || 87 (pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)) 88 return (EBUSY); 89 90 91 if (ap->a_mode & FWRITE) 92 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 93 94 return (0); 95 96 default: 97 break; 98 } 99 100 return (0); 101 } 102 103 /* 104 * close the pfsnode (vp) after doing i/o. 105 * (vp) is not locked on entry or exit. 106 * 107 * nothing to do for procfs other than undo 108 * any exclusive open flag (see _open above). 109 */ 110 procfs_close(ap) 111 struct vop_close_args *ap; 112 { 113 struct pfsnode *pfs = VTOPFS(ap->a_vp); 114 115 switch (pfs->pfs_type) { 116 case Pmem: 117 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 118 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 119 break; 120 } 121 122 return (0); 123 } 124 125 /* 126 * do an ioctl operation on pfsnode (vp). 127 * (vp) is not locked on entry or exit. 128 */ 129 procfs_ioctl(ap) 130 struct vop_ioctl_args *ap; 131 { 132 133 return (ENOTTY); 134 } 135 136 /* 137 * do block mapping for pfsnode (vp). 138 * since we don't use the buffer cache 139 * for procfs this function should never 140 * be called. in any case, it's not clear 141 * what part of the kernel ever makes use 142 * of this function. for sanity, this is the 143 * usual no-op bmap, although returning 144 * (EIO) would be a reasonable alternative. 145 */ 146 procfs_bmap(ap) 147 struct vop_bmap_args *ap; 148 { 149 150 if (ap->a_vpp != NULL) 151 *ap->a_vpp = ap->a_vp; 152 if (ap->a_bnp != NULL) 153 *ap->a_bnp = ap->a_bn; 154 return (0); 155 } 156 157 /* 158 * _inactive is called when the pfsnode 159 * is vrele'd and the reference count goes 160 * to zero. (vp) will be on the vnode free 161 * list, so to get it back vget() must be 162 * used. 163 * 164 * for procfs, check if the process is still 165 * alive and if it isn't then just throw away 166 * the vnode by calling vgone(). this may 167 * be overkill and a waste of time since the 168 * chances are that the process will still be 169 * there and PFIND is not free. 170 * 171 * (vp) is not locked on entry or exit. 172 */ 173 procfs_inactive(ap) 174 struct vop_inactive_args *ap; 175 { 176 struct pfsnode *pfs = VTOPFS(ap->a_vp); 177 178 if (PFIND(pfs->pfs_pid) == 0) 179 vgone(ap->a_vp); 180 181 return (0); 182 } 183 184 /* 185 * _reclaim is called when getnewvnode() 186 * wants to make use of an entry on the vnode 187 * free list. at this time the filesystem needs 188 * to free any private data and remove the node 189 * from any private lists. 190 */ 191 procfs_reclaim(ap) 192 struct vop_reclaim_args *ap; 193 { 194 int error; 195 196 error = procfs_freevp(ap->a_vp); 197 return (error); 198 } 199 200 /* 201 * _print is used for debugging. 202 * just print a readable description 203 * of (vp). 204 */ 205 procfs_print(ap) 206 struct vop_print_args *ap; 207 { 208 struct pfsnode *pfs = VTOPFS(ap->a_vp); 209 210 printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n", 211 pfs->pfs_pid, 212 pfs->pfs_mode, pfs->pfs_flags); 213 } 214 215 /* 216 * _abortop is called when operations such as 217 * rename and create fail. this entry is responsible 218 * for undoing any side-effects caused by the lookup. 219 * this will always include freeing the pathname buffer. 220 */ 221 procfs_abortop(ap) 222 struct vop_abortop_args *ap; 223 { 224 225 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 226 FREE(ap->a_cnp->cn_pnbuf, M_NAMEI); 227 return (0); 228 } 229 230 /* 231 * generic entry point for unsupported operations 232 */ 233 procfs_badop() 234 { 235 236 return (EIO); 237 } 238 239 /* 240 * Invent attributes for pfsnode (vp) and store 241 * them in (vap). 242 * Directories lengths are returned as zero since 243 * any real length would require the genuine size 244 * to be computed, and nothing cares anyway. 245 * 246 * this is relatively minimal for procfs. 247 */ 248 procfs_getattr(ap) 249 struct vop_getattr_args *ap; 250 { 251 struct pfsnode *pfs = VTOPFS(ap->a_vp); 252 struct vattr *vap = ap->a_vap; 253 struct proc *procp; 254 int error; 255 256 /* first check the process still exists */ 257 procp = PFIND(pfs->pfs_pid); 258 if (procp == 0) 259 return (ENOENT); 260 261 error = 0; 262 263 /* start by zeroing out the attributes */ 264 VATTR_NULL(vap); 265 266 /* next do all the common fields */ 267 vap->va_type = ap->a_vp->v_type; 268 vap->va_mode = pfs->pfs_mode; 269 vap->va_fileid = pfs->pfs_fileno; 270 vap->va_flags = 0; 271 vap->va_blocksize = PAGE_SIZE; 272 vap->va_bytes = vap->va_size = 0; 273 274 /* 275 * Make all times be current TOD. 276 * It would be possible to get the process start 277 * time from the p_stat structure, but there's 278 * no "file creation" time stamp anyway, and the 279 * p_stat structure is not addressible if u. gets 280 * swapped out for that process. 281 */ 282 microtime(&vap->va_ctime); 283 vap->va_atime = vap->va_mtime = vap->va_ctime; 284 285 /* 286 * now do the object specific fields 287 * 288 * The size could be set from struct reg, but it's hardly 289 * worth the trouble, and it puts some (potentially) machine 290 * dependent data into this machine-independent code. If it 291 * becomes important then this function should break out into 292 * a per-file stat function in the corresponding .c file. 293 */ 294 295 switch (pfs->pfs_type) { 296 case Proot: 297 vap->va_nlink = 2; 298 vap->va_uid = 0; 299 vap->va_gid = 0; 300 break; 301 302 case Pproc: 303 vap->va_nlink = 2; 304 vap->va_uid = procp->p_ucred->cr_uid; 305 vap->va_gid = procp->p_ucred->cr_gid; 306 break; 307 308 case Pfile: 309 error = EOPNOTSUPP; 310 break; 311 312 case Pmem: 313 vap->va_nlink = 1; 314 vap->va_bytes = vap->va_size = 315 ctob(procp->p_vmspace->vm_tsize + 316 procp->p_vmspace->vm_dsize + 317 procp->p_vmspace->vm_ssize); 318 vap->va_uid = procp->p_ucred->cr_uid; 319 vap->va_gid = procp->p_ucred->cr_gid; 320 break; 321 322 case Pregs: 323 case Pctl: 324 case Pstatus: 325 case Pnote: 326 case Pnotepg: 327 vap->va_nlink = 1; 328 vap->va_uid = procp->p_ucred->cr_uid; 329 vap->va_gid = procp->p_ucred->cr_gid; 330 break; 331 332 default: 333 panic("procfs_getattr"); 334 } 335 336 return (error); 337 } 338 339 procfs_setattr(ap) 340 struct vop_setattr_args *ap; 341 { 342 /* 343 * just fake out attribute setting 344 * it's not good to generate an error 345 * return, otherwise things like creat() 346 * will fail when they try to set the 347 * file length to 0. worse, this means 348 * that echo $note > /proc/$pid/note will fail. 349 */ 350 351 return (0); 352 } 353 354 /* 355 * implement access checking. 356 * 357 * something very similar to this code is duplicated 358 * throughout the 4bsd kernel and should be moved 359 * into kern/vfs_subr.c sometime. 360 * 361 * actually, the check for super-user is slightly 362 * broken since it will allow read access to write-only 363 * objects. this doesn't cause any particular trouble 364 * but does mean that the i/o entry points need to check 365 * that the operation really does make sense. 366 */ 367 procfs_access(ap) 368 struct vop_access_args *ap; 369 { 370 struct vattr *vap; 371 struct vattr vattr; 372 int error; 373 374 /* 375 * If you're the super-user, 376 * you always get access. 377 */ 378 if (ap->a_cred->cr_uid == (uid_t) 0) 379 return (0); 380 vap = &vattr; 381 if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p)) 382 return (error); 383 384 /* 385 * Access check is based on only one of owner, group, public. 386 * If not owner, then check group. If not a member of the 387 * group, then check public access. 388 */ 389 if (ap->a_cred->cr_uid != vap->va_uid) { 390 gid_t *gp; 391 int i; 392 393 (ap->a_mode) >>= 3; 394 gp = ap->a_cred->cr_groups; 395 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++) 396 if (vap->va_gid == *gp) 397 goto found; 398 ap->a_mode >>= 3; 399 found: 400 ; 401 } 402 403 if ((vap->va_mode & ap->a_mode) == ap->a_mode) 404 return (0); 405 406 return (EACCES); 407 } 408 409 /* 410 * lookup. this is incredibly complicated in the 411 * general case, however for most pseudo-filesystems 412 * very little needs to be done. 413 * 414 * unless you want to get a migraine, just make sure your 415 * filesystem doesn't do any locking of its own. otherwise 416 * read and inwardly digest ufs_lookup(). 417 */ 418 procfs_lookup(ap) 419 struct vop_lookup_args *ap; 420 { 421 struct componentname *cnp = ap->a_cnp; 422 struct vnode **vpp = ap->a_vpp; 423 struct vnode *dvp = ap->a_dvp; 424 char *pname = cnp->cn_nameptr; 425 int error = 0; 426 int flag; 427 pid_t pid; 428 struct vnode *nvp; 429 struct pfsnode *pfs; 430 struct proc *procp; 431 int mode; 432 pfstype pfs_type; 433 int i; 434 435 if (cnp->cn_namelen == 1 && *pname == '.') { 436 *vpp = dvp; 437 VREF(dvp); 438 /*VOP_LOCK(dvp);*/ 439 return (0); 440 } 441 442 *vpp = NULL; 443 444 pfs = VTOPFS(dvp); 445 switch (pfs->pfs_type) { 446 case Proot: 447 if (cnp->cn_flags & ISDOTDOT) 448 return (EIO); 449 450 if (CNEQ(cnp, "curproc", 7)) 451 pid = cnp->cn_proc->p_pid; 452 else 453 pid = atopid(pname, cnp->cn_namelen); 454 if (pid == NO_PID) 455 return (ENOENT); 456 457 procp = PFIND(pid); 458 if (procp == 0) 459 return (ENOENT); 460 461 error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc); 462 if (error) 463 return (error); 464 465 nvp->v_type = VDIR; 466 pfs = VTOPFS(nvp); 467 468 *vpp = nvp; 469 return (0); 470 471 case Pproc: 472 if (cnp->cn_flags & ISDOTDOT) { 473 error = procfs_root(dvp->v_mount, vpp); 474 return (error); 475 } 476 477 procp = PFIND(pfs->pfs_pid); 478 if (procp == 0) 479 return (ENOENT); 480 481 for (i = 0; i < Nprocent; i++) { 482 struct pfsnames *dp = &procent[i]; 483 484 if (cnp->cn_namelen == dp->d_namlen && 485 bcmp(pname, dp->d_name, dp->d_namlen) == 0) { 486 pfs_type = dp->d_pfstype; 487 goto found; 488 } 489 } 490 return (ENOENT); 491 492 found: 493 if (pfs_type == Pfile) { 494 nvp = procfs_findtextvp(procp); 495 if (nvp) { 496 VREF(nvp); 497 VOP_LOCK(nvp); 498 } else { 499 error = ENXIO; 500 } 501 } else { 502 error = procfs_allocvp(dvp->v_mount, &nvp, 503 pfs->pfs_pid, pfs_type); 504 if (error) 505 return (error); 506 507 nvp->v_type = VREG; 508 pfs = VTOPFS(nvp); 509 } 510 *vpp = nvp; 511 return (error); 512 513 default: 514 return (ENOTDIR); 515 } 516 } 517 518 /* 519 * readdir returns directory entries from pfsnode (vp). 520 * 521 * the strategy here with procfs is to generate a single 522 * directory entry at a time (struct pfsdent) and then 523 * copy that out to userland using uiomove. a more efficent 524 * though more complex implementation, would try to minimize 525 * the number of calls to uiomove(). for procfs, this is 526 * hardly worth the added code complexity. 527 * 528 * this should just be done through read() 529 */ 530 procfs_readdir(ap) 531 struct vop_readdir_args *ap; 532 { 533 struct uio *uio = ap->a_uio; 534 struct pfsdent d; 535 struct pfsdent *dp = &d; 536 struct pfsnode *pfs; 537 int error; 538 int count; 539 int i; 540 541 pfs = VTOPFS(ap->a_vp); 542 543 if (uio->uio_resid < UIO_MX) 544 return (EINVAL); 545 if (uio->uio_offset & (UIO_MX-1)) 546 return (EINVAL); 547 if (uio->uio_offset < 0) 548 return (EINVAL); 549 550 error = 0; 551 count = 0; 552 i = uio->uio_offset / UIO_MX; 553 554 switch (pfs->pfs_type) { 555 /* 556 * this is for the process-specific sub-directories. 557 * all that is needed to is copy out all the entries 558 * from the procent[] table (top of this file). 559 */ 560 case Pproc: { 561 while (uio->uio_resid >= UIO_MX) { 562 struct pfsnames *dt; 563 564 if (i >= Nprocent) 565 break; 566 567 dt = &procent[i]; 568 569 dp->d_reclen = UIO_MX; 570 dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype); 571 dp->d_type = DT_REG; 572 dp->d_namlen = dt->d_namlen; 573 bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1); 574 error = uiomove((caddr_t) dp, UIO_MX, uio); 575 if (error) 576 break; 577 count += UIO_MX; 578 i++; 579 } 580 581 break; 582 583 } 584 585 /* 586 * this is for the root of the procfs filesystem 587 * what is needed is a special entry for "curproc" 588 * followed by an entry for each process on allproc 589 #ifdef PROCFS_ZOMBIE 590 * and zombproc. 591 #endif 592 */ 593 594 case Proot: { 595 int pcnt; 596 #ifdef PROCFS_ZOMBIE 597 int doingzomb = 0; 598 #endif 599 volatile struct proc *p; 600 601 p = allproc; 602 603 #define PROCFS_XFILES 1 /* number of other entries, like "curproc" */ 604 pcnt = PROCFS_XFILES; 605 606 while (p && uio->uio_resid >= UIO_MX) { 607 bzero((char *) dp, UIO_MX); 608 dp->d_type = DT_DIR; 609 dp->d_reclen = UIO_MX; 610 611 switch (i) { 612 case 0: 613 /* ship out entry for "curproc" */ 614 dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc); 615 dp->d_namlen = sprintf(dp->d_name, "curproc"); 616 break; 617 618 default: 619 if (pcnt >= i) { 620 dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 621 dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid); 622 } 623 624 p = p->p_next; 625 626 #ifdef PROCFS_ZOMBIE 627 if (p == 0 && doingzomb == 0) { 628 doingzomb = 1; 629 p = zombproc; 630 } 631 #endif 632 633 if (pcnt++ < i) 634 continue; 635 636 break; 637 } 638 error = uiomove((caddr_t) dp, UIO_MX, uio); 639 if (error) 640 break; 641 count += UIO_MX; 642 i++; 643 } 644 645 break; 646 647 } 648 649 default: 650 error = ENOTDIR; 651 break; 652 } 653 654 uio->uio_offset = i * UIO_MX; 655 656 return (error); 657 } 658 659 /* 660 * convert decimal ascii to pid_t 661 */ 662 static pid_t 663 atopid(b, len) 664 const char *b; 665 u_int len; 666 { 667 pid_t p = 0; 668 669 while (len--) { 670 char c = *b++; 671 if (c < '0' || c > '9') 672 return (NO_PID); 673 p = 10 * p + (c - '0'); 674 if (p > PID_MAX) 675 return (NO_PID); 676 } 677 678 return (p); 679 } 680 681 /* 682 * procfs vnode operations. 683 */ 684 int (**procfs_vnodeop_p)(); 685 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 686 { &vop_default_desc, vn_default_error }, 687 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 688 { &vop_create_desc, procfs_create }, /* create */ 689 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 690 { &vop_open_desc, procfs_open }, /* open */ 691 { &vop_close_desc, procfs_close }, /* close */ 692 { &vop_access_desc, procfs_access }, /* access */ 693 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 694 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 695 { &vop_read_desc, procfs_read }, /* read */ 696 { &vop_write_desc, procfs_write }, /* write */ 697 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 698 { &vop_select_desc, procfs_select }, /* select */ 699 { &vop_mmap_desc, procfs_mmap }, /* mmap */ 700 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 701 { &vop_seek_desc, procfs_seek }, /* seek */ 702 { &vop_remove_desc, procfs_remove }, /* remove */ 703 { &vop_link_desc, procfs_link }, /* link */ 704 { &vop_rename_desc, procfs_rename }, /* rename */ 705 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 706 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 707 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 708 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 709 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 710 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 711 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 712 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 713 { &vop_lock_desc, procfs_lock }, /* lock */ 714 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 715 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 716 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 717 { &vop_print_desc, procfs_print }, /* print */ 718 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 719 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 720 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 721 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 722 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 723 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 724 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 725 { &vop_update_desc, procfs_update }, /* update */ 726 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 727 }; 728 struct vnodeopv_desc procfs_vnodeop_opv_desc = 729 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 730