1 /* 2 * Copyright (c) 1993 Jan-Simon Pendry 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_subr.c,v 1.26.2.3 2002/02/18 21:28:04 des Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/uio.h> 42 #include <sys/proc.h> 43 #include <sys/mount.h> 44 #include <sys/vnode.h> 45 #include <sys/malloc.h> 46 #include <sys/spinlock.h> 47 48 #include <sys/spinlock2.h> 49 50 #include <vfs/procfs/procfs.h> 51 52 #define PFS_HSIZE 1031 53 54 struct pfshead { 55 struct spinlock spin; 56 struct pfsnode *first; 57 } __cachealign; 58 59 static struct pfshead pfshead[PFS_HSIZE]; 60 static struct lock procfslk = LOCK_INITIALIZER("pvplk", 0, 0); 61 62 MALLOC_DEFINE(M_PROCFS, "procfs", "procfs v_data"); 63 64 #define PFSHASH(pid) &pfshead[((pid) & ~PFS_DEAD) % PFS_HSIZE] 65 66 /* 67 * Allocate a pfsnode/vnode pair. If no error occurs the returned vnode 68 * will be referenced and exclusively locked. 69 * 70 * The pid, pfs_type, and mount point uniquely identify a pfsnode. 71 * The mount point is needed because someone might mount this filesystem 72 * twice. 73 * 74 * All pfsnodes are maintained on a singly-linked list. new nodes are 75 * only allocated when they cannot be found on this list. entries on 76 * the list are removed when the vfs reclaim entry is called. 77 * 78 * A single lock is kept for the entire list. this is needed because the 79 * getnewvnode() function can block waiting for a vnode to become free, 80 * in which case there may be more than one process trying to get the same 81 * vnode. this lock is only taken if we are going to call getnewvnode, 82 * since the kernel itself is single-threaded. 83 * 84 * If an entry is found on the list, then call vget() to take a reference 85 * and obtain the lock. This will properly re-reference the vnode if it 86 * had gotten onto the free list. 87 */ 88 int 89 procfs_allocvp(struct mount *mp, struct vnode **vpp, long pid, pfstype pfs_type) 90 { 91 struct pfsnode *pfs; 92 struct vnode *vp; 93 struct pfshead *ph; 94 int error; 95 96 ph = PFSHASH(pid); 97 loop: 98 spin_lock(&ph->spin); 99 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 100 if (pfs->pfs_pid == pid && pfs->pfs_type == pfs_type && 101 PFSTOV(pfs)->v_mount == mp) { 102 vp = PFSTOV(pfs); 103 vhold(vp); 104 spin_unlock(&ph->spin); 105 if (vget(vp, LK_EXCLUSIVE)) { 106 vdrop(vp); 107 goto loop; 108 } 109 vdrop(vp); 110 111 /* 112 * Make sure the vnode is still in the cache after 113 * getting the interlock to avoid racing a free. 114 */ 115 spin_lock(&ph->spin); 116 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 117 if (PFSTOV(pfs) == vp && 118 pfs->pfs_pid == pid && 119 pfs->pfs_type == pfs_type && 120 PFSTOV(pfs)->v_mount == mp) { 121 break; 122 } 123 } 124 if (pfs == NULL || PFSTOV(pfs) != vp) { 125 spin_unlock(&ph->spin); 126 vput(vp); 127 goto loop; 128 129 } 130 spin_unlock(&ph->spin); 131 *vpp = vp; 132 return (0); 133 } 134 } 135 spin_unlock(&ph->spin); 136 137 /* 138 * otherwise lock the vp list while we call getnewvnode 139 * since that can block. 140 */ 141 if (lockmgr(&procfslk, LK_EXCLUSIVE|LK_SLEEPFAIL)) 142 goto loop; 143 144 /* 145 * Do the MALLOC before the getnewvnode since doing so afterward 146 * might cause a bogus v_data pointer to get dereferenced 147 * elsewhere if MALLOC should block. 148 * 149 * XXX this may not matter anymore since getnewvnode now returns 150 * a VX locked vnode. 151 */ 152 pfs = kmalloc(sizeof(struct pfsnode), M_PROCFS, M_WAITOK); 153 154 error = getnewvnode(VT_PROCFS, mp, vpp, 0, 0); 155 if (error) { 156 kfree(pfs, M_PROCFS); 157 goto out; 158 } 159 vp = *vpp; 160 161 vp->v_data = pfs; 162 163 pfs->pfs_next = 0; 164 pfs->pfs_pid = (pid_t) pid; 165 pfs->pfs_type = pfs_type; 166 pfs->pfs_vnode = vp; 167 pfs->pfs_flags = 0; 168 pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type); 169 lockinit(&pfs->pfs_lock, "pfslk", 0, 0); 170 171 switch (pfs_type) { 172 case Proot: /* /proc = dr-xr-xr-x */ 173 pfs->pfs_mode = (VREAD|VEXEC) | 174 (VREAD|VEXEC) >> 3 | 175 (VREAD|VEXEC) >> 6; 176 vp->v_type = VDIR; 177 vp->v_flag = VROOT; 178 break; 179 180 case Pcurproc: /* /proc/curproc = lr--r--r-- */ 181 pfs->pfs_mode = (VREAD) | 182 (VREAD >> 3) | 183 (VREAD >> 6); 184 vp->v_type = VLNK; 185 break; 186 187 case Pproc: 188 pfs->pfs_mode = (VREAD|VEXEC) | 189 (VREAD|VEXEC) >> 3 | 190 (VREAD|VEXEC) >> 6; 191 vp->v_type = VDIR; 192 break; 193 194 case Pfile: 195 pfs->pfs_mode = (VREAD|VEXEC) | 196 (VREAD|VEXEC) >> 3 | 197 (VREAD|VEXEC) >> 6; 198 vp->v_type = VLNK; 199 break; 200 201 case Pmem: 202 pfs->pfs_mode = (VREAD|VWRITE); 203 vp->v_type = VREG; 204 break; 205 206 case Pregs: 207 case Pfpregs: 208 case Pdbregs: 209 pfs->pfs_mode = (VREAD|VWRITE); 210 vp->v_type = VREG; 211 break; 212 213 case Pctl: 214 case Pnote: 215 case Pnotepg: 216 pfs->pfs_mode = (VWRITE); 217 vp->v_type = VREG; 218 break; 219 220 case Ptype: 221 case Pmap: 222 case Pstatus: 223 case Pcmdline: 224 case Prlimit: 225 pfs->pfs_mode = (VREAD) | 226 (VREAD >> 3) | 227 (VREAD >> 6); 228 vp->v_type = VREG; 229 break; 230 231 default: 232 panic("procfs_allocvp"); 233 } 234 235 /* add to procfs vnode list */ 236 spin_lock(&ph->spin); 237 pfs->pfs_next = ph->first; 238 ph->first = pfs; 239 spin_unlock(&ph->spin); 240 241 out: 242 lockmgr(&procfslk, LK_RELEASE); 243 244 return (error); 245 } 246 247 int 248 procfs_freevp(struct vnode *vp) 249 { 250 struct pfshead *ph; 251 struct pfsnode **pp; 252 struct pfsnode *pfs; 253 254 pfs = VTOPFS(vp); 255 vp->v_data = NULL; 256 ph = PFSHASH(pfs->pfs_pid); 257 258 spin_lock(&ph->spin); 259 pp = &ph->first; 260 while (*pp != pfs) { 261 KKASSERT(*pp != NULL); 262 pp = &(*pp)->pfs_next; 263 } 264 *pp = pfs->pfs_next; 265 spin_unlock(&ph->spin); 266 267 pfs->pfs_next = NULL; 268 pfs->pfs_vnode = NULL; 269 kfree(pfs, M_PROCFS); 270 271 return (0); 272 } 273 274 /* 275 * Try to find the calling pid. Note that pfind() 276 * now references the proc structure to be returned 277 * and needs to be released later with PRELE(). 278 */ 279 struct proc * 280 pfs_pfind(pid_t pfs_pid) 281 { 282 struct proc *p = NULL; 283 284 if (pfs_pid == 0) { 285 p = &proc0; 286 PHOLD(p); 287 } else { 288 p = pfind(pfs_pid); 289 } 290 291 /* 292 * Make sure the process is not in the middle of exiting (where 293 * a lot of its structural members may wind up being NULL). If it 294 * is we give up on it. 295 */ 296 if (p) { 297 lwkt_gettoken(&p->p_token); 298 if (p->p_flags & P_POSTEXIT) { 299 lwkt_reltoken(&p->p_token); 300 PRELE(p); 301 p = NULL; 302 } 303 } 304 return p; 305 } 306 307 struct proc * 308 pfs_zpfind(pid_t pfs_pid) 309 { 310 struct proc *p = NULL; 311 312 if (pfs_pid == 0) { 313 p = &proc0; 314 PHOLD(p); 315 } else { 316 p = zpfind(pfs_pid); 317 } 318 319 /* 320 * Make sure the process is not in the middle of exiting (where 321 * a lot of its structural members may wind up being NULL). If it 322 * is we give up on it. 323 */ 324 if (p) { 325 lwkt_gettoken(&p->p_token); 326 if (p->p_flags & P_POSTEXIT) { 327 lwkt_reltoken(&p->p_token); 328 PRELE(p); 329 p = NULL; 330 } 331 } 332 return p; 333 } 334 335 void 336 pfs_pdone(struct proc *p) 337 { 338 if (p) { 339 lwkt_reltoken(&p->p_token); 340 PRELE(p); 341 } 342 } 343 344 int 345 procfs_rw(struct vop_read_args *ap) 346 { 347 struct vnode *vp = ap->a_vp; 348 struct uio *uio = ap->a_uio; 349 struct thread *curtd = uio->uio_td; 350 struct proc *curp; 351 struct pfsnode *pfs = VTOPFS(vp); 352 struct proc *p; 353 struct lwp *lp; 354 int rtval; 355 356 if (curtd == NULL) 357 return (EINVAL); 358 if ((curp = curtd->td_proc) == NULL) /* XXX */ 359 return (EINVAL); 360 361 p = pfs_pfind(pfs->pfs_pid); 362 if (p == NULL) { 363 rtval = EINVAL; 364 goto out; 365 } 366 if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE) { 367 rtval = EACCES; 368 goto out; 369 } 370 /* XXX lwp */ 371 lp = FIRST_LWP_IN_PROC(p); 372 LWPHOLD(lp); 373 374 lockmgr(&pfs->pfs_lock, LK_EXCLUSIVE); 375 376 switch (pfs->pfs_type) { 377 case Pnote: 378 case Pnotepg: 379 rtval = procfs_donote(curp, lp, pfs, uio); 380 break; 381 382 case Pregs: 383 rtval = procfs_doregs(curp, lp, pfs, uio); 384 break; 385 386 case Pfpregs: 387 rtval = procfs_dofpregs(curp, lp, pfs, uio); 388 break; 389 390 case Pdbregs: 391 rtval = procfs_dodbregs(curp, lp, pfs, uio); 392 break; 393 394 case Pctl: 395 rtval = procfs_doctl(curp, lp, pfs, uio); 396 break; 397 398 case Pstatus: 399 rtval = procfs_dostatus(curp, lp, pfs, uio); 400 break; 401 402 case Pmap: 403 rtval = procfs_domap(curp, lp, pfs, uio); 404 break; 405 406 case Pmem: 407 rtval = procfs_domem(curp, lp, pfs, uio); 408 break; 409 410 case Ptype: 411 rtval = procfs_dotype(curp, lp, pfs, uio); 412 break; 413 414 case Pcmdline: 415 rtval = procfs_docmdline(curp, lp, pfs, uio); 416 break; 417 418 case Prlimit: 419 rtval = procfs_dorlimit(curp, lp, pfs, uio); 420 break; 421 422 default: 423 rtval = EOPNOTSUPP; 424 break; 425 } 426 LWPRELE(lp); 427 428 lockmgr(&pfs->pfs_lock, LK_RELEASE); 429 out: 430 pfs_pdone(p); 431 432 return rtval; 433 } 434 435 /* 436 * Get a string from userland into (buf). Strip a trailing 437 * nl character (to allow easy access from the shell). 438 * The buffer should be *buflenp + 1 chars long. vfs_getuserstr 439 * will automatically add a nul char at the end. 440 * 441 * Returns 0 on success or the following errors 442 * 443 * EINVAL: file offset is non-zero. 444 * EMSGSIZE: message is longer than kernel buffer 445 * EFAULT: user i/o buffer is not addressable 446 */ 447 int 448 vfs_getuserstr(struct uio *uio, char *buf, int *buflenp) 449 { 450 int xlen; 451 int error; 452 453 if (uio->uio_offset != 0) 454 return (EINVAL); 455 456 xlen = *buflenp; 457 458 /* must be able to read the whole string in one go */ 459 if (xlen < uio->uio_resid) 460 return (EMSGSIZE); 461 xlen = uio->uio_resid; 462 463 if ((error = uiomove(buf, xlen, uio)) != 0) 464 return (error); 465 466 /* allow multiple writes without seeks */ 467 uio->uio_offset = 0; 468 469 /* cleanup string and remove trailing newline */ 470 buf[xlen] = '\0'; 471 xlen = strlen(buf); 472 if (xlen > 0 && buf[xlen-1] == '\n') 473 buf[--xlen] = '\0'; 474 *buflenp = xlen; 475 476 return (0); 477 } 478 479 vfs_namemap_t * 480 vfs_findname(vfs_namemap_t *nm, char *buf, int buflen) 481 { 482 483 for (; nm->nm_name; nm++) 484 if (bcmp(buf, nm->nm_name, buflen+1) == 0) 485 return (nm); 486 487 return (0); 488 } 489 490 void 491 procfs_exit(struct thread *td) 492 { 493 struct pfshead *ph; 494 struct pfsnode *pfs; 495 struct vnode *vp; 496 pid_t pid; 497 498 KKASSERT(td->td_proc); 499 pid = td->td_proc->p_pid; 500 501 /* 502 * NOTE: We can't just vgone() the vnode any more, not while 503 * it may potentially still be active. This will clean 504 * the vp and clear the mount and cause the new VOP subsystem 505 * to assert or panic when someone tries to do an operation 506 * on an open (exited) procfs descriptor. 507 * 508 * Prevent further operations on this pid by setting pfs_pid to -1. 509 * Note that a pfs_pid of 0 is used for nodes which do not track 510 * any particular pid. 511 * 512 * Use vx_get() to properly ref/lock a vp which may not have any 513 * refs and which may or may not already be reclaimed. vx_put() 514 * will then properly deactivate it and cause it to be recycled. 515 * 516 * The hash table can also get ripped out from under us when 517 * we block so take the easy way out and restart the scan. 518 */ 519 for (;;) { 520 ph = PFSHASH(pid); 521 spin_lock(&ph->spin); 522 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 523 if (pfs->pfs_pid == pid) 524 break; 525 } 526 if (pfs == NULL) { 527 spin_unlock(&ph->spin); 528 break; 529 } 530 vp = PFSTOV(pfs); 531 vhold(vp); 532 spin_unlock(&ph->spin); 533 vx_get(vp); 534 pfs->pfs_pid |= PFS_DEAD; /* does not effect hash */ 535 vx_put(vp); 536 vdrop(vp); 537 } 538 } 539