1 /* $NetBSD: procfs_vnops.c,v 1.89 2002/05/09 15:44:45 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Jan-Simon Pendry 5 * Copyright (c) 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 40 */ 41 42 /* 43 * procfs vnode interface 44 */ 45 46 #include <sys/cdefs.h> 47 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.89 2002/05/09 15:44:45 thorpej Exp $"); 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/time.h> 52 #include <sys/kernel.h> 53 #include <sys/file.h> 54 #include <sys/proc.h> 55 #include <sys/vnode.h> 56 #include <sys/namei.h> 57 #include <sys/malloc.h> 58 #include <sys/mount.h> 59 #include <sys/dirent.h> 60 #include <sys/resourcevar.h> 61 #include <sys/stat.h> 62 #include <sys/ptrace.h> 63 64 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 65 66 #include <machine/reg.h> 67 68 #include <miscfs/genfs/genfs.h> 69 #include <miscfs/procfs/procfs.h> 70 71 /* 72 * Vnode Operations. 73 * 74 */ 75 76 static int procfs_validfile_linux __P((struct proc *, struct mount *)); 77 78 /* 79 * This is a list of the valid names in the 80 * process-specific sub-directories. It is 81 * used in procfs_lookup and procfs_readdir 82 */ 83 const struct proc_target { 84 u_char pt_type; 85 u_char pt_namlen; 86 char *pt_name; 87 pfstype pt_pfstype; 88 int (*pt_valid) __P((struct proc *, struct mount *)); 89 } proc_targets[] = { 90 #define N(s) sizeof(s)-1, s 91 /* name type validp */ 92 { DT_DIR, N("."), Pproc, NULL }, 93 { DT_DIR, N(".."), Proot, NULL }, 94 { DT_REG, N("file"), Pfile, procfs_validfile }, 95 { DT_REG, N("mem"), Pmem, NULL }, 96 { DT_REG, N("regs"), Pregs, procfs_validregs }, 97 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 98 { DT_REG, N("ctl"), Pctl, NULL }, 99 { DT_REG, N("status"), Pstatus, NULL }, 100 { DT_REG, N("note"), Pnote, NULL }, 101 { DT_REG, N("notepg"), Pnotepg, NULL }, 102 { DT_REG, N("map"), Pmap, procfs_validmap }, 103 { DT_REG, N("maps"), Pmaps, procfs_validmap }, 104 { DT_REG, N("cmdline"), Pcmdline, NULL }, 105 { DT_REG, N("exe"), Pfile, procfs_validfile_linux }, 106 #ifdef __HAVE_PROCFS_MACHDEP 107 PROCFS_MACHDEP_NODETYPE_DEFNS 108 #endif 109 #undef N 110 }; 111 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 112 113 /* 114 * List of files in the root directory. Note: the validate function will 115 * be called with p == NULL for these ones. 116 */ 117 struct proc_target proc_root_targets[] = { 118 #define N(s) sizeof(s)-1, s 119 /* name type validp */ 120 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux }, 121 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux }, 122 #undef N 123 }; 124 static int nproc_root_targets = 125 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 126 127 int procfs_lookup __P((void *)); 128 #define procfs_create genfs_eopnotsupp_rele 129 #define procfs_mknod genfs_eopnotsupp_rele 130 int procfs_open __P((void *)); 131 int procfs_close __P((void *)); 132 int procfs_access __P((void *)); 133 int procfs_getattr __P((void *)); 134 int procfs_setattr __P((void *)); 135 #define procfs_read procfs_rw 136 #define procfs_write procfs_rw 137 #define procfs_fcntl genfs_fcntl 138 #define procfs_ioctl genfs_enoioctl 139 #define procfs_poll genfs_poll 140 #define procfs_revoke genfs_revoke 141 #define procfs_fsync genfs_nullop 142 #define procfs_seek genfs_nullop 143 #define procfs_remove genfs_eopnotsupp_rele 144 int procfs_link __P((void *)); 145 #define procfs_rename genfs_eopnotsupp_rele 146 #define procfs_mkdir genfs_eopnotsupp_rele 147 #define procfs_rmdir genfs_eopnotsupp_rele 148 int procfs_symlink __P((void *)); 149 int procfs_readdir __P((void *)); 150 int procfs_readlink __P((void *)); 151 #define procfs_abortop genfs_abortop 152 int procfs_inactive __P((void *)); 153 int procfs_reclaim __P((void *)); 154 #define procfs_lock genfs_lock 155 #define procfs_unlock genfs_unlock 156 #define procfs_bmap genfs_badop 157 #define procfs_strategy genfs_badop 158 int procfs_print __P((void *)); 159 int procfs_pathconf __P((void *)); 160 #define procfs_islocked genfs_islocked 161 #define procfs_advlock genfs_einval 162 #define procfs_blkatoff genfs_eopnotsupp 163 #define procfs_valloc genfs_eopnotsupp 164 #define procfs_vfree genfs_nullop 165 #define procfs_truncate genfs_eopnotsupp 166 #define procfs_update genfs_nullop 167 #define procfs_bwrite genfs_eopnotsupp 168 #define procfs_putpages genfs_null_putpages 169 170 static pid_t atopid __P((const char *, u_int)); 171 172 /* 173 * procfs vnode operations. 174 */ 175 int (**procfs_vnodeop_p) __P((void *)); 176 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 177 { &vop_default_desc, vn_default_error }, 178 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 179 { &vop_create_desc, procfs_create }, /* create */ 180 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 181 { &vop_open_desc, procfs_open }, /* open */ 182 { &vop_close_desc, procfs_close }, /* close */ 183 { &vop_access_desc, procfs_access }, /* access */ 184 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 185 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 186 { &vop_read_desc, procfs_read }, /* read */ 187 { &vop_write_desc, procfs_write }, /* write */ 188 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 189 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 190 { &vop_poll_desc, procfs_poll }, /* poll */ 191 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 192 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 193 { &vop_seek_desc, procfs_seek }, /* seek */ 194 { &vop_remove_desc, procfs_remove }, /* remove */ 195 { &vop_link_desc, procfs_link }, /* link */ 196 { &vop_rename_desc, procfs_rename }, /* rename */ 197 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 198 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 199 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 200 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 201 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 202 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 203 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 204 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 205 { &vop_lock_desc, procfs_lock }, /* lock */ 206 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 207 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 208 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 209 { &vop_print_desc, procfs_print }, /* print */ 210 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 211 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 212 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 213 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 214 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 215 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 216 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 217 { &vop_update_desc, procfs_update }, /* update */ 218 { &vop_putpages_desc, procfs_putpages }, /* putpages */ 219 { NULL, NULL } 220 }; 221 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 222 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 223 /* 224 * set things up for doing i/o on 225 * the pfsnode (vp). (vp) is locked 226 * on entry, and should be left locked 227 * on exit. 228 * 229 * for procfs we don't need to do anything 230 * in particular for i/o. all that is done 231 * is to support exclusive open on process 232 * memory images. 233 */ 234 int 235 procfs_open(v) 236 void *v; 237 { 238 struct vop_open_args /* { 239 struct vnode *a_vp; 240 int a_mode; 241 struct ucred *a_cred; 242 struct proc *a_p; 243 } */ *ap = v; 244 struct pfsnode *pfs = VTOPFS(ap->a_vp); 245 struct proc *p1, *p2; 246 int error; 247 248 p1 = ap->a_p; /* tracer */ 249 p2 = PFIND(pfs->pfs_pid); /* traced */ 250 251 if (p2 == NULL) 252 return (ENOENT); /* was ESRCH, jsp */ 253 254 switch (pfs->pfs_type) { 255 case Pmem: 256 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 257 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 258 return (EBUSY); 259 260 if ((error = process_checkioperm(p1, p2)) != 0) 261 return (error); 262 263 if (ap->a_mode & FWRITE) 264 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 265 266 return (0); 267 268 default: 269 break; 270 } 271 272 return (0); 273 } 274 275 /* 276 * close the pfsnode (vp) after doing i/o. 277 * (vp) is not locked on entry or exit. 278 * 279 * nothing to do for procfs other than undo 280 * any exclusive open flag (see _open above). 281 */ 282 int 283 procfs_close(v) 284 void *v; 285 { 286 struct vop_close_args /* { 287 struct vnode *a_vp; 288 int a_fflag; 289 struct ucred *a_cred; 290 struct proc *a_p; 291 } */ *ap = v; 292 struct pfsnode *pfs = VTOPFS(ap->a_vp); 293 294 switch (pfs->pfs_type) { 295 case Pmem: 296 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 297 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 298 break; 299 300 default: 301 break; 302 } 303 304 return (0); 305 } 306 307 /* 308 * _inactive is called when the pfsnode 309 * is vrele'd and the reference count goes 310 * to zero. (vp) will be on the vnode free 311 * list, so to get it back vget() must be 312 * used. 313 * 314 * for procfs, check if the process is still 315 * alive and if it isn't then just throw away 316 * the vnode by calling vgone(). this may 317 * be overkill and a waste of time since the 318 * chances are that the process will still be 319 * there and PFIND is not free. 320 * 321 * (vp) is locked on entry, but must be unlocked on exit. 322 */ 323 int 324 procfs_inactive(v) 325 void *v; 326 { 327 struct vop_inactive_args /* { 328 struct vnode *a_vp; 329 struct proc *a_p; 330 } */ *ap = v; 331 struct pfsnode *pfs = VTOPFS(ap->a_vp); 332 333 VOP_UNLOCK(ap->a_vp, 0); 334 if (PFIND(pfs->pfs_pid) == 0) 335 vgone(ap->a_vp); 336 337 return (0); 338 } 339 340 /* 341 * _reclaim is called when getnewvnode() 342 * wants to make use of an entry on the vnode 343 * free list. at this time the filesystem needs 344 * to free any private data and remove the node 345 * from any private lists. 346 */ 347 int 348 procfs_reclaim(v) 349 void *v; 350 { 351 struct vop_reclaim_args /* { 352 struct vnode *a_vp; 353 } */ *ap = v; 354 355 return (procfs_freevp(ap->a_vp)); 356 } 357 358 /* 359 * Return POSIX pathconf information applicable to special devices. 360 */ 361 int 362 procfs_pathconf(v) 363 void *v; 364 { 365 struct vop_pathconf_args /* { 366 struct vnode *a_vp; 367 int a_name; 368 register_t *a_retval; 369 } */ *ap = v; 370 371 switch (ap->a_name) { 372 case _PC_LINK_MAX: 373 *ap->a_retval = LINK_MAX; 374 return (0); 375 case _PC_MAX_CANON: 376 *ap->a_retval = MAX_CANON; 377 return (0); 378 case _PC_MAX_INPUT: 379 *ap->a_retval = MAX_INPUT; 380 return (0); 381 case _PC_PIPE_BUF: 382 *ap->a_retval = PIPE_BUF; 383 return (0); 384 case _PC_CHOWN_RESTRICTED: 385 *ap->a_retval = 1; 386 return (0); 387 case _PC_VDISABLE: 388 *ap->a_retval = _POSIX_VDISABLE; 389 return (0); 390 case _PC_SYNC_IO: 391 *ap->a_retval = 1; 392 return (0); 393 default: 394 return (EINVAL); 395 } 396 /* NOTREACHED */ 397 } 398 399 /* 400 * _print is used for debugging. 401 * just print a readable description 402 * of (vp). 403 */ 404 int 405 procfs_print(v) 406 void *v; 407 { 408 struct vop_print_args /* { 409 struct vnode *a_vp; 410 } */ *ap = v; 411 struct pfsnode *pfs = VTOPFS(ap->a_vp); 412 413 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 414 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 415 return 0; 416 } 417 418 int 419 procfs_link(v) 420 void *v; 421 { 422 struct vop_link_args /* { 423 struct vnode *a_dvp; 424 struct vnode *a_vp; 425 struct componentname *a_cnp; 426 } */ *ap = v; 427 428 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 429 vput(ap->a_dvp); 430 return (EROFS); 431 } 432 433 int 434 procfs_symlink(v) 435 void *v; 436 { 437 struct vop_symlink_args /* { 438 struct vnode *a_dvp; 439 struct vnode **a_vpp; 440 struct componentname *a_cnp; 441 struct vattr *a_vap; 442 char *a_target; 443 } */ *ap = v; 444 445 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 446 vput(ap->a_dvp); 447 return (EROFS); 448 } 449 450 /* 451 * Invent attributes for pfsnode (vp) and store 452 * them in (vap). 453 * Directories lengths are returned as zero since 454 * any real length would require the genuine size 455 * to be computed, and nothing cares anyway. 456 * 457 * this is relatively minimal for procfs. 458 */ 459 int 460 procfs_getattr(v) 461 void *v; 462 { 463 struct vop_getattr_args /* { 464 struct vnode *a_vp; 465 struct vattr *a_vap; 466 struct ucred *a_cred; 467 struct proc *a_p; 468 } */ *ap = v; 469 struct pfsnode *pfs = VTOPFS(ap->a_vp); 470 struct vattr *vap = ap->a_vap; 471 struct proc *procp; 472 struct timeval tv; 473 int error; 474 475 /* first check the process still exists */ 476 switch (pfs->pfs_type) { 477 case Proot: 478 case Pcurproc: 479 case Pself: 480 procp = 0; 481 break; 482 483 default: 484 procp = PFIND(pfs->pfs_pid); 485 if (procp == 0) 486 return (ENOENT); 487 break; 488 } 489 490 error = 0; 491 492 /* start by zeroing out the attributes */ 493 VATTR_NULL(vap); 494 495 /* next do all the common fields */ 496 vap->va_type = ap->a_vp->v_type; 497 vap->va_mode = pfs->pfs_mode; 498 vap->va_fileid = pfs->pfs_fileno; 499 vap->va_flags = 0; 500 vap->va_blocksize = PAGE_SIZE; 501 502 /* 503 * Make all times be current TOD. 504 * It would be possible to get the process start 505 * time from the p_stat structure, but there's 506 * no "file creation" time stamp anyway, and the 507 * p_stat structure is not addressible if u. gets 508 * swapped out for that process. 509 */ 510 microtime(&tv); 511 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime); 512 vap->va_atime = vap->va_mtime = vap->va_ctime; 513 514 switch (pfs->pfs_type) { 515 case Pmem: 516 case Pregs: 517 case Pfpregs: 518 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES) 519 PROCFS_MACHDEP_PROTECT_CASES 520 #endif 521 /* 522 * If the process has exercised some setuid or setgid 523 * privilege, then rip away read/write permission so 524 * that only root can gain access. 525 */ 526 if (procp->p_flag & P_SUGID) 527 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 528 /* FALLTHROUGH */ 529 case Pctl: 530 case Pstatus: 531 case Pnote: 532 case Pnotepg: 533 case Pmap: 534 case Pmaps: 535 case Pcmdline: 536 vap->va_nlink = 1; 537 vap->va_uid = procp->p_ucred->cr_uid; 538 vap->va_gid = procp->p_ucred->cr_gid; 539 break; 540 case Pmeminfo: 541 case Pcpuinfo: 542 vap->va_nlink = 1; 543 vap->va_uid = vap->va_gid = 0; 544 break; 545 546 default: 547 break; 548 } 549 550 /* 551 * now do the object specific fields 552 * 553 * The size could be set from struct reg, but it's hardly 554 * worth the trouble, and it puts some (potentially) machine 555 * dependent data into this machine-independent code. If it 556 * becomes important then this function should break out into 557 * a per-file stat function in the corresponding .c file. 558 */ 559 560 switch (pfs->pfs_type) { 561 case Proot: 562 /* 563 * Set nlink to 1 to tell fts(3) we don't actually know. 564 */ 565 vap->va_nlink = 1; 566 vap->va_uid = 0; 567 vap->va_gid = 0; 568 vap->va_bytes = vap->va_size = DEV_BSIZE; 569 break; 570 571 case Pcurproc: { 572 char buf[16]; /* should be enough */ 573 vap->va_nlink = 1; 574 vap->va_uid = 0; 575 vap->va_gid = 0; 576 vap->va_bytes = vap->va_size = 577 sprintf(buf, "%ld", (long)curproc->p_pid); 578 break; 579 } 580 581 case Pself: 582 vap->va_nlink = 1; 583 vap->va_uid = 0; 584 vap->va_gid = 0; 585 vap->va_bytes = vap->va_size = sizeof("curproc"); 586 break; 587 588 case Pproc: 589 vap->va_nlink = 2; 590 vap->va_uid = procp->p_ucred->cr_uid; 591 vap->va_gid = procp->p_ucred->cr_gid; 592 vap->va_bytes = vap->va_size = DEV_BSIZE; 593 break; 594 595 case Pfile: 596 error = EOPNOTSUPP; 597 break; 598 599 case Pmem: 600 vap->va_bytes = vap->va_size = 601 ctob(procp->p_vmspace->vm_tsize + 602 procp->p_vmspace->vm_dsize + 603 procp->p_vmspace->vm_ssize); 604 break; 605 606 #if defined(PT_GETREGS) || defined(PT_SETREGS) 607 case Pregs: 608 vap->va_bytes = vap->va_size = sizeof(struct reg); 609 break; 610 #endif 611 612 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 613 case Pfpregs: 614 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 615 break; 616 #endif 617 618 case Pctl: 619 case Pstatus: 620 case Pnote: 621 case Pnotepg: 622 case Pcmdline: 623 case Pmeminfo: 624 case Pcpuinfo: 625 vap->va_bytes = vap->va_size = 0; 626 break; 627 case Pmap: 628 case Pmaps: 629 /* 630 * Advise a larger blocksize for the map files, so that 631 * they may be read in one pass. 632 */ 633 vap->va_blocksize = 4 * PAGE_SIZE; 634 vap->va_bytes = vap->va_size = 0; 635 break; 636 637 #ifdef __HAVE_PROCFS_MACHDEP 638 PROCFS_MACHDEP_NODETYPE_CASES 639 error = procfs_machdep_getattr(ap->a_vp, vap, procp); 640 break; 641 #endif 642 643 default: 644 panic("procfs_getattr"); 645 } 646 647 return (error); 648 } 649 650 /*ARGSUSED*/ 651 int 652 procfs_setattr(v) 653 void *v; 654 { 655 /* 656 * just fake out attribute setting 657 * it's not good to generate an error 658 * return, otherwise things like creat() 659 * will fail when they try to set the 660 * file length to 0. worse, this means 661 * that echo $note > /proc/$pid/note will fail. 662 */ 663 664 return (0); 665 } 666 667 /* 668 * implement access checking. 669 * 670 * actually, the check for super-user is slightly 671 * broken since it will allow read access to write-only 672 * objects. this doesn't cause any particular trouble 673 * but does mean that the i/o entry points need to check 674 * that the operation really does make sense. 675 */ 676 int 677 procfs_access(v) 678 void *v; 679 { 680 struct vop_access_args /* { 681 struct vnode *a_vp; 682 int a_mode; 683 struct ucred *a_cred; 684 struct proc *a_p; 685 } */ *ap = v; 686 struct vattr va; 687 int error; 688 689 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) 690 return (error); 691 692 return (vaccess(va.va_type, va.va_mode, 693 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred)); 694 } 695 696 /* 697 * lookup. this is incredibly complicated in the 698 * general case, however for most pseudo-filesystems 699 * very little needs to be done. 700 * 701 * Locking isn't hard here, just poorly documented. 702 * 703 * If we're looking up ".", just vref the parent & return it. 704 * 705 * If we're looking up "..", unlock the parent, and lock "..". If everything 706 * went ok, and we're on the last component and the caller requested the 707 * parent locked, try to re-lock the parent. We do this to prevent lock 708 * races. 709 * 710 * For anything else, get the needed node. Then unlock the parent if not 711 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 712 * parent in the .. case). 713 * 714 * We try to exit with the parent locked in error cases. 715 */ 716 int 717 procfs_lookup(v) 718 void *v; 719 { 720 struct vop_lookup_args /* { 721 struct vnode * a_dvp; 722 struct vnode ** a_vpp; 723 struct componentname * a_cnp; 724 } */ *ap = v; 725 struct componentname *cnp = ap->a_cnp; 726 struct vnode **vpp = ap->a_vpp; 727 struct vnode *dvp = ap->a_dvp; 728 const char *pname = cnp->cn_nameptr; 729 const struct proc_target *pt = NULL; 730 struct vnode *fvp; 731 pid_t pid; 732 struct pfsnode *pfs; 733 struct proc *p = NULL; 734 int i, error, wantpunlock, iscurproc = 0, isself = 0; 735 736 *vpp = NULL; 737 cnp->cn_flags &= ~PDIRUNLOCK; 738 739 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 740 return (EROFS); 741 742 if (cnp->cn_namelen == 1 && *pname == '.') { 743 *vpp = dvp; 744 VREF(dvp); 745 return (0); 746 } 747 748 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); 749 pfs = VTOPFS(dvp); 750 switch (pfs->pfs_type) { 751 case Proot: 752 /* 753 * Shouldn't get here with .. in the root node. 754 */ 755 if (cnp->cn_flags & ISDOTDOT) 756 return (EIO); 757 758 iscurproc = CNEQ(cnp, "curproc", 7); 759 isself = CNEQ(cnp, "self", 4); 760 761 if (iscurproc || isself) { 762 error = procfs_allocvp(dvp->v_mount, vpp, 0, 763 iscurproc ? Pcurproc : Pself); 764 if ((error == 0) && (wantpunlock)) { 765 VOP_UNLOCK(dvp, 0); 766 cnp->cn_flags |= PDIRUNLOCK; 767 } 768 return (error); 769 } 770 771 for (i = 0; i < nproc_root_targets; i++) { 772 pt = &proc_root_targets[i]; 773 if (cnp->cn_namelen == pt->pt_namlen && 774 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 775 (pt->pt_valid == NULL || 776 (*pt->pt_valid)(p, dvp->v_mount))) 777 break; 778 } 779 780 if (i != nproc_root_targets) { 781 error = procfs_allocvp(dvp->v_mount, vpp, 0, 782 pt->pt_pfstype); 783 if ((error == 0) && (wantpunlock)) { 784 VOP_UNLOCK(dvp, 0); 785 cnp->cn_flags |= PDIRUNLOCK; 786 } 787 return (error); 788 } 789 790 pid = atopid(pname, cnp->cn_namelen); 791 if (pid == NO_PID) 792 break; 793 794 p = PFIND(pid); 795 if (p == 0) 796 break; 797 798 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 799 if ((error == 0) && (wantpunlock)) { 800 VOP_UNLOCK(dvp, 0); 801 cnp->cn_flags |= PDIRUNLOCK; 802 } 803 return (error); 804 805 case Pproc: 806 /* 807 * do the .. dance. We unlock the directory, and then 808 * get the root dir. That will automatically return .. 809 * locked. Then if the caller wanted dvp locked, we 810 * re-lock. 811 */ 812 if (cnp->cn_flags & ISDOTDOT) { 813 VOP_UNLOCK(dvp, 0); 814 cnp->cn_flags |= PDIRUNLOCK; 815 error = procfs_root(dvp->v_mount, vpp); 816 if ((error == 0) && (wantpunlock == 0) && 817 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 818 cnp->cn_flags &= ~PDIRUNLOCK; 819 return (error); 820 } 821 822 p = PFIND(pfs->pfs_pid); 823 if (p == 0) 824 break; 825 826 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 827 if (cnp->cn_namelen == pt->pt_namlen && 828 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 829 (pt->pt_valid == NULL || 830 (*pt->pt_valid)(p, dvp->v_mount))) 831 goto found; 832 } 833 break; 834 835 found: 836 if (pt->pt_pfstype == Pfile) { 837 fvp = p->p_textvp; 838 /* We already checked that it exists. */ 839 VREF(fvp); 840 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 841 if (wantpunlock) { 842 VOP_UNLOCK(dvp, 0); 843 cnp->cn_flags |= PDIRUNLOCK; 844 } 845 *vpp = fvp; 846 return (0); 847 } 848 849 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 850 pt->pt_pfstype); 851 if ((error == 0) && (wantpunlock)) { 852 VOP_UNLOCK(dvp, 0); 853 cnp->cn_flags |= PDIRUNLOCK; 854 } 855 return (error); 856 857 default: 858 return (ENOTDIR); 859 } 860 861 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 862 } 863 864 int 865 procfs_validfile(p, mp) 866 struct proc *p; 867 struct mount *mp; 868 { 869 return (p->p_textvp != NULL); 870 } 871 872 static int 873 procfs_validfile_linux(p, mp) 874 struct proc *p; 875 struct mount *mp; 876 { 877 int flags; 878 879 flags = VFSTOPROC(mp)->pmnt_flags; 880 return ((flags & PROCFSMNT_LINUXCOMPAT) && 881 (p == NULL || procfs_validfile(p, mp))); 882 } 883 884 /* 885 * readdir returns directory entries from pfsnode (vp). 886 * 887 * the strategy here with procfs is to generate a single 888 * directory entry at a time (struct dirent) and then 889 * copy that out to userland using uiomove. a more efficent 890 * though more complex implementation, would try to minimize 891 * the number of calls to uiomove(). for procfs, this is 892 * hardly worth the added code complexity. 893 * 894 * this should just be done through read() 895 */ 896 int 897 procfs_readdir(v) 898 void *v; 899 { 900 struct vop_readdir_args /* { 901 struct vnode *a_vp; 902 struct uio *a_uio; 903 struct ucred *a_cred; 904 int *a_eofflag; 905 off_t **a_cookies; 906 int *a_ncookies; 907 } */ *ap = v; 908 struct uio *uio = ap->a_uio; 909 struct dirent d; 910 struct pfsnode *pfs; 911 off_t i; 912 int error; 913 off_t *cookies = NULL; 914 int ncookies, left, skip, j; 915 struct vnode *vp; 916 const struct proc_target *pt; 917 918 vp = ap->a_vp; 919 pfs = VTOPFS(vp); 920 921 if (uio->uio_resid < UIO_MX) 922 return (EINVAL); 923 if (uio->uio_offset < 0) 924 return (EINVAL); 925 926 error = 0; 927 i = uio->uio_offset; 928 memset((caddr_t)&d, 0, UIO_MX); 929 d.d_reclen = UIO_MX; 930 ncookies = uio->uio_resid / UIO_MX; 931 932 switch (pfs->pfs_type) { 933 /* 934 * this is for the process-specific sub-directories. 935 * all that is needed to is copy out all the entries 936 * from the procent[] table (top of this file). 937 */ 938 case Pproc: { 939 struct proc *p; 940 941 if (i >= nproc_targets) 942 return 0; 943 944 p = PFIND(pfs->pfs_pid); 945 if (p == NULL) 946 break; 947 948 if (ap->a_ncookies) { 949 ncookies = min(ncookies, (nproc_targets - i)); 950 cookies = malloc(ncookies * sizeof (off_t), 951 M_TEMP, M_WAITOK); 952 *ap->a_cookies = cookies; 953 } 954 955 for (pt = &proc_targets[i]; 956 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 957 if (pt->pt_valid && 958 (*pt->pt_valid)(p, vp->v_mount) == 0) 959 continue; 960 961 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); 962 d.d_namlen = pt->pt_namlen; 963 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 964 d.d_type = pt->pt_type; 965 966 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 967 break; 968 if (cookies) 969 *cookies++ = i + 1; 970 } 971 972 break; 973 } 974 975 /* 976 * this is for the root of the procfs filesystem 977 * what is needed are special entries for "curproc" 978 * and "self" followed by an entry for each process 979 * on allproc 980 #ifdef PROCFS_ZOMBIE 981 * and deadproc and zombproc. 982 #endif 983 */ 984 985 case Proot: { 986 int pcnt = i, nc = 0; 987 const struct proclist_desc *pd; 988 volatile struct proc *p; 989 990 if (pcnt > 3) 991 pcnt = 3; 992 if (ap->a_ncookies) { 993 /* 994 * XXX Potentially allocating too much space here, 995 * but I'm lazy. This loop needs some work. 996 */ 997 cookies = malloc(ncookies * sizeof (off_t), 998 M_TEMP, M_WAITOK); 999 *ap->a_cookies = cookies; 1000 } 1001 /* 1002 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST 1003 * PROCLIST IN THE proclists! 1004 */ 1005 proclist_lock_read(); 1006 pd = proclists; 1007 #ifdef PROCFS_ZOMBIE 1008 again: 1009 #endif 1010 for (p = LIST_FIRST(pd->pd_list); 1011 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) { 1012 switch (i) { 1013 case 0: /* `.' */ 1014 case 1: /* `..' */ 1015 d.d_fileno = PROCFS_FILENO(0, Proot); 1016 d.d_namlen = i + 1; 1017 memcpy(d.d_name, "..", d.d_namlen); 1018 d.d_name[i + 1] = '\0'; 1019 d.d_type = DT_DIR; 1020 break; 1021 1022 case 2: 1023 d.d_fileno = PROCFS_FILENO(0, Pcurproc); 1024 d.d_namlen = sizeof("curproc") - 1; 1025 memcpy(d.d_name, "curproc", sizeof("curproc")); 1026 d.d_type = DT_LNK; 1027 break; 1028 1029 case 3: 1030 d.d_fileno = PROCFS_FILENO(0, Pself); 1031 d.d_namlen = sizeof("self") - 1; 1032 memcpy(d.d_name, "self", sizeof("self")); 1033 d.d_type = DT_LNK; 1034 break; 1035 1036 default: 1037 while (pcnt < i) { 1038 pcnt++; 1039 p = LIST_NEXT(p, p_list); 1040 if (!p) 1041 goto done; 1042 } 1043 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 1044 d.d_namlen = sprintf(d.d_name, "%ld", 1045 (long)p->p_pid); 1046 d.d_type = DT_DIR; 1047 p = p->p_list.le_next; 1048 break; 1049 } 1050 1051 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1052 break; 1053 nc++; 1054 if (cookies) 1055 *cookies++ = i + 1; 1056 } 1057 done: 1058 1059 #ifdef PROCFS_ZOMBIE 1060 pd++; 1061 if (p == NULL && pd->pd_list != NULL) 1062 goto again; 1063 #endif 1064 proclist_unlock_read(); 1065 1066 skip = i - pcnt; 1067 if (skip >= nproc_root_targets) 1068 break; 1069 left = nproc_root_targets - skip; 1070 for (j = 0, pt = &proc_root_targets[0]; 1071 uio->uio_resid >= UIO_MX && j < left; 1072 pt++, j++, i++) { 1073 if (pt->pt_valid && 1074 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1075 continue; 1076 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype); 1077 d.d_namlen = pt->pt_namlen; 1078 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1079 d.d_type = pt->pt_type; 1080 1081 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1082 break; 1083 nc++; 1084 if (cookies) 1085 *cookies++ = i + 1; 1086 } 1087 1088 ncookies = nc; 1089 break; 1090 } 1091 1092 default: 1093 error = ENOTDIR; 1094 break; 1095 } 1096 1097 if (ap->a_ncookies) { 1098 if (error) { 1099 if (cookies) 1100 free(*ap->a_cookies, M_TEMP); 1101 *ap->a_ncookies = 0; 1102 *ap->a_cookies = NULL; 1103 } else 1104 *ap->a_ncookies = ncookies; 1105 } 1106 uio->uio_offset = i; 1107 return (error); 1108 } 1109 1110 /* 1111 * readlink reads the link of `curproc' 1112 */ 1113 int 1114 procfs_readlink(v) 1115 void *v; 1116 { 1117 struct vop_readlink_args *ap = v; 1118 char buf[16]; /* should be enough */ 1119 int len; 1120 1121 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc)) 1122 len = sprintf(buf, "%ld", (long)curproc->p_pid); 1123 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself)) 1124 len = sprintf(buf, "%s", "curproc"); 1125 else 1126 return (EINVAL); 1127 1128 return (uiomove((caddr_t)buf, len, ap->a_uio)); 1129 } 1130 1131 /* 1132 * convert decimal ascii to pid_t 1133 */ 1134 static pid_t 1135 atopid(b, len) 1136 const char *b; 1137 u_int len; 1138 { 1139 pid_t p = 0; 1140 1141 while (len--) { 1142 char c = *b++; 1143 if (c < '0' || c > '9') 1144 return (NO_PID); 1145 p = 10 * p + (c - '0'); 1146 if (p > PID_MAX) 1147 return (NO_PID); 1148 } 1149 1150 return (p); 1151 } 1152