1 /* 2 * Copyright (c) 1993 Jan-Simon Pendry 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_subr.c,v 1.26.2.3 2002/02/18 21:28:04 des Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/uio.h> 42 #include <sys/proc.h> 43 #include <sys/mount.h> 44 #include <sys/vnode.h> 45 #include <sys/malloc.h> 46 #include <sys/spinlock.h> 47 48 #include <sys/spinlock2.h> 49 50 #include <vfs/procfs/procfs.h> 51 52 #define PFS_HSIZE 1031 53 54 struct pfshead { 55 struct spinlock spin; 56 struct pfsnode *first; 57 } __cachealign; 58 59 static struct pfshead pfshead[PFS_HSIZE]; 60 static struct lock procfslk = LOCK_INITIALIZER("pvplk", 0, 0); 61 62 MALLOC_DEFINE(M_PROCFS, "procfs", "procfs v_data"); 63 64 #define PFSHASH(pid) &pfshead[((pid) & ~PFS_DEAD) % PFS_HSIZE] 65 66 /* 67 * Allocate a pfsnode/vnode pair. If no error occurs the returned vnode 68 * will be referenced and exclusively locked. 69 * 70 * The pid, pfs_type, and mount point uniquely identify a pfsnode. 71 * The mount point is needed because someone might mount this filesystem 72 * twice. 73 * 74 * All pfsnodes are maintained on a singly-linked list. new nodes are 75 * only allocated when they cannot be found on this list. entries on 76 * the list are removed when the vfs reclaim entry is called. 77 * 78 * A single lock is kept for the entire list. this is needed because the 79 * getnewvnode() function can block waiting for a vnode to become free, 80 * in which case there may be more than one process trying to get the same 81 * vnode. this lock is only taken if we are going to call getnewvnode, 82 * since the kernel itself is single-threaded. 83 * 84 * If an entry is found on the list, then call vget() to take a reference 85 * and obtain the lock. This will properly re-reference the vnode if it 86 * had gotten onto the free list. 87 */ 88 int 89 procfs_allocvp(struct mount *mp, struct vnode **vpp, long pid, pfstype pfs_type) 90 { 91 struct pfsnode *pfs; 92 struct vnode *vp; 93 struct pfshead *ph; 94 int error; 95 96 ph = PFSHASH(pid); 97 loop: 98 spin_lock(&ph->spin); 99 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 100 if (pfs->pfs_pid == pid && pfs->pfs_type == pfs_type && 101 PFSTOV(pfs)->v_mount == mp) { 102 vp = PFSTOV(pfs); 103 vhold(vp); 104 spin_unlock(&ph->spin); 105 if (vget(vp, LK_EXCLUSIVE)) { 106 vdrop(vp); 107 goto loop; 108 } 109 vdrop(vp); 110 111 /* 112 * Make sure the vnode is still in the cache after 113 * getting the interlock to avoid racing a free. 114 */ 115 spin_lock(&ph->spin); 116 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 117 if (PFSTOV(pfs) == vp && 118 pfs->pfs_pid == pid && 119 pfs->pfs_type == pfs_type && 120 PFSTOV(pfs)->v_mount == mp) { 121 break; 122 } 123 } 124 if (pfs == NULL || PFSTOV(pfs) != vp) { 125 spin_unlock(&ph->spin); 126 vput(vp); 127 goto loop; 128 129 } 130 spin_unlock(&ph->spin); 131 *vpp = vp; 132 return (0); 133 } 134 } 135 spin_unlock(&ph->spin); 136 137 /* 138 * otherwise lock the vp list while we call getnewvnode 139 * since that can block. 140 */ 141 if (lockmgr(&procfslk, LK_EXCLUSIVE|LK_SLEEPFAIL)) 142 goto loop; 143 144 /* 145 * Do the MALLOC before the getnewvnode since doing so afterward 146 * might cause a bogus v_data pointer to get dereferenced 147 * elsewhere if MALLOC should block. 148 * 149 * XXX this may not matter anymore since getnewvnode now returns 150 * a VX locked vnode. 151 */ 152 pfs = kmalloc(sizeof(struct pfsnode), M_PROCFS, M_WAITOK); 153 154 error = getnewvnode(VT_PROCFS, mp, vpp, 0, 0); 155 if (error) { 156 kfree(pfs, M_PROCFS); 157 goto out; 158 } 159 vp = *vpp; 160 161 vp->v_data = pfs; 162 163 pfs->pfs_next = 0; 164 pfs->pfs_pid = (pid_t) pid; 165 pfs->pfs_type = pfs_type; 166 pfs->pfs_vnode = vp; 167 pfs->pfs_flags = 0; 168 pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type); 169 lockinit(&pfs->pfs_lock, "pfslk", 0, 0); 170 171 switch (pfs_type) { 172 case Proot: /* /proc = dr-xr-xr-x */ 173 pfs->pfs_mode = (VREAD|VEXEC) | 174 (VREAD|VEXEC) >> 3 | 175 (VREAD|VEXEC) >> 6; 176 vp->v_type = VDIR; 177 vp->v_flag = VROOT; 178 break; 179 180 case Pcurproc: /* /proc/curproc = lr--r--r-- */ 181 pfs->pfs_mode = (VREAD) | 182 (VREAD >> 3) | 183 (VREAD >> 6); 184 vp->v_type = VLNK; 185 break; 186 187 case Pproc: 188 pfs->pfs_mode = (VREAD|VEXEC) | 189 (VREAD|VEXEC) >> 3 | 190 (VREAD|VEXEC) >> 6; 191 vp->v_type = VDIR; 192 break; 193 194 case Pfile: 195 pfs->pfs_mode = (VREAD|VEXEC) | 196 (VREAD|VEXEC) >> 3 | 197 (VREAD|VEXEC) >> 6; 198 vp->v_type = VLNK; 199 break; 200 201 case Pmem: 202 pfs->pfs_mode = (VREAD|VWRITE); 203 vp->v_type = VREG; 204 break; 205 206 case Pregs: 207 case Pfpregs: 208 case Pdbregs: 209 pfs->pfs_mode = (VREAD|VWRITE); 210 vp->v_type = VREG; 211 break; 212 213 case Pctl: 214 case Pnote: 215 case Pnotepg: 216 pfs->pfs_mode = (VWRITE); 217 vp->v_type = VREG; 218 break; 219 220 case Ptype: 221 case Pmap: 222 case Pstatus: 223 case Pcmdline: 224 case Prlimit: 225 pfs->pfs_mode = (VREAD) | 226 (VREAD >> 3) | 227 (VREAD >> 6); 228 vp->v_type = VREG; 229 break; 230 231 default: 232 panic("procfs_allocvp"); 233 } 234 235 /* add to procfs vnode list */ 236 spin_lock(&ph->spin); 237 pfs->pfs_next = ph->first; 238 ph->first = pfs; 239 spin_unlock(&ph->spin); 240 241 out: 242 lockmgr(&procfslk, LK_RELEASE); 243 244 return (error); 245 } 246 247 int 248 procfs_freevp(struct vnode *vp) 249 { 250 struct pfshead *ph; 251 struct pfsnode **pp; 252 struct pfsnode *pfs; 253 254 pfs = VTOPFS(vp); 255 vp->v_data = NULL; 256 ph = PFSHASH(pfs->pfs_pid); 257 258 spin_lock(&ph->spin); 259 pp = &ph->first; 260 while (*pp != pfs) { 261 KKASSERT(*pp != NULL); 262 pp = &(*pp)->pfs_next; 263 } 264 *pp = pfs->pfs_next; 265 spin_unlock(&ph->spin); 266 267 pfs->pfs_next = NULL; 268 pfs->pfs_vnode = NULL; 269 kfree(pfs, M_PROCFS); 270 271 return (0); 272 } 273 274 /* 275 * Try to find the calling pid. Note that pfind() 276 * now references the proc structure to be returned 277 * and needs to be released later with PRELE(). 278 */ 279 struct proc * 280 pfs_pfind(pid_t pfs_pid) 281 { 282 struct proc *p = NULL; 283 284 if (pfs_pid == 0) { 285 p = &proc0; 286 PHOLD(p); 287 } else { 288 p = pfind(pfs_pid); 289 } 290 291 /* 292 * Make sure the process is not in the middle of exiting (where 293 * a lot of its structural members may wind up being NULL). If it 294 * is we give up on it. 295 */ 296 if (p) { 297 lwkt_gettoken(&p->p_token); 298 if (p->p_flags & P_POSTEXIT) { 299 lwkt_reltoken(&p->p_token); 300 PRELE(p); 301 p = NULL; 302 } 303 } 304 return p; 305 } 306 307 struct proc * 308 pfs_zpfind(pid_t pfs_pid) 309 { 310 struct proc *p = NULL; 311 312 if (pfs_pid == 0) { 313 p = &proc0; 314 PHOLD(p); 315 } else { 316 p = zpfind(pfs_pid); 317 } 318 319 /* 320 * Make sure the process is not in the middle of exiting (where 321 * a lot of its structural members may wind up being NULL). If it 322 * is we give up on it. 323 */ 324 if (p) { 325 lwkt_gettoken(&p->p_token); 326 if (p->p_flags & P_POSTEXIT) { 327 lwkt_reltoken(&p->p_token); 328 PRELE(p); 329 p = NULL; 330 } 331 } 332 return p; 333 } 334 335 void 336 pfs_pdone(struct proc *p) 337 { 338 if (p) { 339 lwkt_reltoken(&p->p_token); 340 PRELE(p); 341 } 342 } 343 344 int 345 procfs_rw(struct vop_read_args *ap) 346 { 347 struct vnode *vp = ap->a_vp; 348 struct uio *uio = ap->a_uio; 349 struct thread *curtd = uio->uio_td; 350 struct proc *curp; 351 struct pfsnode *pfs = VTOPFS(vp); 352 struct proc *p; 353 struct lwp *lp; 354 int rtval; 355 356 if (curtd == NULL) 357 return (EINVAL); 358 if ((curp = curtd->td_proc) == NULL) /* XXX */ 359 return (EINVAL); 360 361 p = pfs_pfind(pfs->pfs_pid); 362 if (p == NULL) { 363 rtval = EINVAL; 364 goto out; 365 } 366 if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE) { 367 rtval = EACCES; 368 goto out; 369 } 370 371 /* 372 * XXX lwp 373 */ 374 lp = FIRST_LWP_IN_PROC(p); 375 if (lp == NULL) { 376 rtval = EINVAL; 377 goto out; 378 } 379 LWPHOLD(lp); 380 381 lockmgr(&pfs->pfs_lock, LK_EXCLUSIVE); 382 383 switch (pfs->pfs_type) { 384 case Pnote: 385 case Pnotepg: 386 rtval = procfs_donote(curp, lp, pfs, uio); 387 break; 388 389 case Pregs: 390 rtval = procfs_doregs(curp, lp, pfs, uio); 391 break; 392 393 case Pfpregs: 394 rtval = procfs_dofpregs(curp, lp, pfs, uio); 395 break; 396 397 case Pdbregs: 398 rtval = procfs_dodbregs(curp, lp, pfs, uio); 399 break; 400 401 case Pctl: 402 rtval = procfs_doctl(curp, lp, pfs, uio); 403 break; 404 405 case Pstatus: 406 rtval = procfs_dostatus(curp, lp, pfs, uio); 407 break; 408 409 case Pmap: 410 rtval = procfs_domap(curp, lp, pfs, uio); 411 break; 412 413 case Pmem: 414 rtval = procfs_domem(curp, lp, pfs, uio); 415 break; 416 417 case Ptype: 418 rtval = procfs_dotype(curp, lp, pfs, uio); 419 break; 420 421 case Pcmdline: 422 rtval = procfs_docmdline(curp, lp, pfs, uio); 423 break; 424 425 case Prlimit: 426 rtval = procfs_dorlimit(curp, lp, pfs, uio); 427 break; 428 429 default: 430 rtval = EOPNOTSUPP; 431 break; 432 } 433 LWPRELE(lp); 434 435 lockmgr(&pfs->pfs_lock, LK_RELEASE); 436 out: 437 pfs_pdone(p); 438 439 return rtval; 440 } 441 442 /* 443 * Get a string from userland into (buf). Strip a trailing 444 * nl character (to allow easy access from the shell). 445 * The buffer should be *buflenp + 1 chars long. vfs_getuserstr 446 * will automatically add a nul char at the end. 447 * 448 * Returns 0 on success or the following errors 449 * 450 * EINVAL: file offset is non-zero. 451 * EMSGSIZE: message is longer than kernel buffer 452 * EFAULT: user i/o buffer is not addressable 453 */ 454 int 455 vfs_getuserstr(struct uio *uio, char *buf, int *buflenp) 456 { 457 int xlen; 458 int error; 459 460 if (uio->uio_offset != 0) 461 return (EINVAL); 462 463 xlen = *buflenp; 464 465 /* must be able to read the whole string in one go */ 466 if (xlen < uio->uio_resid) 467 return (EMSGSIZE); 468 xlen = uio->uio_resid; 469 470 if ((error = uiomove(buf, xlen, uio)) != 0) 471 return (error); 472 473 /* allow multiple writes without seeks */ 474 uio->uio_offset = 0; 475 476 /* cleanup string and remove trailing newline */ 477 buf[xlen] = '\0'; 478 xlen = strlen(buf); 479 if (xlen > 0 && buf[xlen-1] == '\n') 480 buf[--xlen] = '\0'; 481 *buflenp = xlen; 482 483 return (0); 484 } 485 486 vfs_namemap_t * 487 vfs_findname(vfs_namemap_t *nm, char *buf, int buflen) 488 { 489 490 for (; nm->nm_name; nm++) 491 if (bcmp(buf, nm->nm_name, buflen+1) == 0) 492 return (nm); 493 494 return (0); 495 } 496 497 void 498 procfs_exit(struct thread *td) 499 { 500 struct pfshead *ph; 501 struct pfsnode *pfs; 502 struct vnode *vp; 503 pid_t pid; 504 505 KKASSERT(td->td_proc); 506 pid = td->td_proc->p_pid; 507 508 /* 509 * NOTE: We can't just vgone() the vnode any more, not while 510 * it may potentially still be active. This will clean 511 * the vp and clear the mount and cause the new VOP subsystem 512 * to assert or panic when someone tries to do an operation 513 * on an open (exited) procfs descriptor. 514 * 515 * Prevent further operations on this pid by setting pfs_pid to -1. 516 * Note that a pfs_pid of 0 is used for nodes which do not track 517 * any particular pid. 518 * 519 * Use vx_get() to properly ref/lock a vp which may not have any 520 * refs and which may or may not already be reclaimed. vx_put() 521 * will then properly deactivate it and cause it to be recycled. 522 * 523 * The hash table can also get ripped out from under us when 524 * we block so take the easy way out and restart the scan. 525 */ 526 for (;;) { 527 ph = PFSHASH(pid); 528 spin_lock(&ph->spin); 529 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 530 if (pfs->pfs_pid == pid) 531 break; 532 } 533 if (pfs == NULL) { 534 spin_unlock(&ph->spin); 535 break; 536 } 537 vp = PFSTOV(pfs); 538 vhold(vp); 539 spin_unlock(&ph->spin); 540 vx_get(vp); 541 pfs->pfs_pid |= PFS_DEAD; /* does not effect hash */ 542 vx_put(vp); 543 vdrop(vp); 544 } 545 } 546