1 /* 2 * Copyright (c) 1993 Jan-Simon Pendry 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_subr.c,v 1.26.2.3 2002/02/18 21:28:04 des Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/proc.h> 42 #include <sys/mount.h> 43 #include <sys/vnode.h> 44 #include <sys/malloc.h> 45 #include <sys/spinlock.h> 46 47 #include <sys/spinlock2.h> 48 49 #include <vfs/procfs/procfs.h> 50 51 #define PFS_HSIZE 1031 52 53 struct pfshead { 54 struct spinlock spin; 55 struct pfsnode *first; 56 } __cachealign; 57 58 static struct pfshead pfshead[PFS_HSIZE]; 59 static struct lock procfslk = LOCK_INITIALIZER("pvplk", 0, 0); 60 61 MALLOC_DEFINE(M_PROCFS, "procfs", "procfs v_data"); 62 63 #define PFSHASH(pid) &pfshead[((pid) & ~PFS_DEAD) % PFS_HSIZE] 64 65 /* 66 * Allocate a pfsnode/vnode pair. If no error occurs the returned vnode 67 * will be referenced and exclusively locked. 68 * 69 * The pid, pfs_type, and mount point uniquely identify a pfsnode. 70 * The mount point is needed because someone might mount this filesystem 71 * twice. 72 * 73 * All pfsnodes are maintained on a singly-linked list. new nodes are 74 * only allocated when they cannot be found on this list. entries on 75 * the list are removed when the vfs reclaim entry is called. 76 * 77 * A single lock is kept for the entire list. this is needed because the 78 * getnewvnode() function can block waiting for a vnode to become free, 79 * in which case there may be more than one process trying to get the same 80 * vnode. this lock is only taken if we are going to call getnewvnode, 81 * since the kernel itself is single-threaded. 82 * 83 * If an entry is found on the list, then call vget() to take a reference 84 * and obtain the lock. This will properly re-reference the vnode if it 85 * had gotten onto the free list. 86 */ 87 int 88 procfs_allocvp(struct mount *mp, struct vnode **vpp, long pid, pfstype pfs_type) 89 { 90 struct pfsnode *pfs; 91 struct vnode *vp; 92 struct pfshead *ph; 93 int error; 94 95 ph = PFSHASH(pid); 96 loop: 97 spin_lock(&ph->spin); 98 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 99 if (pfs->pfs_pid == pid && pfs->pfs_type == pfs_type && 100 PFSTOV(pfs)->v_mount == mp) { 101 vp = PFSTOV(pfs); 102 vhold(vp); 103 spin_unlock(&ph->spin); 104 if (vget(vp, LK_EXCLUSIVE)) { 105 vdrop(vp); 106 goto loop; 107 } 108 vdrop(vp); 109 110 /* 111 * Make sure the vnode is still in the cache after 112 * getting the interlock to avoid racing a free. 113 */ 114 spin_lock(&ph->spin); 115 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 116 if (PFSTOV(pfs) == vp && 117 pfs->pfs_pid == pid && 118 pfs->pfs_type == pfs_type && 119 PFSTOV(pfs)->v_mount == mp) { 120 break; 121 } 122 } 123 if (pfs == NULL || PFSTOV(pfs) != vp) { 124 spin_unlock(&ph->spin); 125 vput(vp); 126 goto loop; 127 128 } 129 spin_unlock(&ph->spin); 130 *vpp = vp; 131 return (0); 132 } 133 } 134 spin_unlock(&ph->spin); 135 136 /* 137 * otherwise lock the vp list while we call getnewvnode 138 * since that can block. 139 */ 140 if (lockmgr(&procfslk, LK_EXCLUSIVE|LK_SLEEPFAIL)) 141 goto loop; 142 143 /* 144 * Do the MALLOC before the getnewvnode since doing so afterward 145 * might cause a bogus v_data pointer to get dereferenced 146 * elsewhere if MALLOC should block. 147 * 148 * XXX this may not matter anymore since getnewvnode now returns 149 * a VX locked vnode. 150 */ 151 pfs = kmalloc(sizeof(struct pfsnode), M_PROCFS, M_WAITOK); 152 153 error = getnewvnode(VT_PROCFS, mp, vpp, 0, 0); 154 if (error) { 155 kfree(pfs, M_PROCFS); 156 goto out; 157 } 158 vp = *vpp; 159 160 vp->v_data = pfs; 161 162 pfs->pfs_next = 0; 163 pfs->pfs_pid = (pid_t) pid; 164 pfs->pfs_type = pfs_type; 165 pfs->pfs_vnode = vp; 166 pfs->pfs_flags = 0; 167 pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type); 168 lockinit(&pfs->pfs_lock, "pfslk", 0, 0); 169 170 switch (pfs_type) { 171 case Proot: /* /proc = dr-xr-xr-x */ 172 pfs->pfs_mode = (VREAD|VEXEC) | 173 (VREAD|VEXEC) >> 3 | 174 (VREAD|VEXEC) >> 6; 175 vp->v_type = VDIR; 176 vp->v_flag = VROOT; 177 break; 178 179 case Pcurproc: /* /proc/curproc = lr--r--r-- */ 180 pfs->pfs_mode = (VREAD) | 181 (VREAD >> 3) | 182 (VREAD >> 6); 183 vp->v_type = VLNK; 184 break; 185 186 case Pproc: 187 pfs->pfs_mode = (VREAD|VEXEC) | 188 (VREAD|VEXEC) >> 3 | 189 (VREAD|VEXEC) >> 6; 190 vp->v_type = VDIR; 191 break; 192 193 case Pfile: 194 pfs->pfs_mode = (VREAD|VEXEC) | 195 (VREAD|VEXEC) >> 3 | 196 (VREAD|VEXEC) >> 6; 197 vp->v_type = VLNK; 198 break; 199 200 case Pmem: 201 pfs->pfs_mode = (VREAD|VWRITE); 202 vp->v_type = VREG; 203 break; 204 205 case Pregs: 206 case Pfpregs: 207 case Pdbregs: 208 pfs->pfs_mode = (VREAD|VWRITE); 209 vp->v_type = VREG; 210 break; 211 212 case Pctl: 213 case Pnote: 214 case Pnotepg: 215 pfs->pfs_mode = (VWRITE); 216 vp->v_type = VREG; 217 break; 218 219 case Ptype: 220 case Pmap: 221 case Pstatus: 222 case Pcmdline: 223 case Prlimit: 224 pfs->pfs_mode = (VREAD) | 225 (VREAD >> 3) | 226 (VREAD >> 6); 227 vp->v_type = VREG; 228 break; 229 230 default: 231 panic("procfs_allocvp"); 232 } 233 234 /* add to procfs vnode list */ 235 spin_lock(&ph->spin); 236 pfs->pfs_next = ph->first; 237 ph->first = pfs; 238 spin_unlock(&ph->spin); 239 240 out: 241 lockmgr(&procfslk, LK_RELEASE); 242 243 return (error); 244 } 245 246 int 247 procfs_freevp(struct vnode *vp) 248 { 249 struct pfshead *ph; 250 struct pfsnode **pp; 251 struct pfsnode *pfs; 252 253 pfs = VTOPFS(vp); 254 vp->v_data = NULL; 255 ph = PFSHASH(pfs->pfs_pid); 256 257 spin_lock(&ph->spin); 258 pp = &ph->first; 259 while (*pp != pfs) { 260 KKASSERT(*pp != NULL); 261 pp = &(*pp)->pfs_next; 262 } 263 *pp = pfs->pfs_next; 264 spin_unlock(&ph->spin); 265 266 pfs->pfs_next = NULL; 267 pfs->pfs_vnode = NULL; 268 kfree(pfs, M_PROCFS); 269 270 return (0); 271 } 272 273 /* 274 * Try to find the calling pid. Note that pfind() 275 * now references the proc structure to be returned 276 * and needs to be released later with PRELE(). 277 */ 278 struct proc * 279 pfs_pfind(pid_t pfs_pid) 280 { 281 struct proc *p = NULL; 282 283 if (pfs_pid == 0) { 284 p = &proc0; 285 PHOLD(p); 286 } else { 287 p = pfind(pfs_pid); 288 } 289 290 /* 291 * Make sure the process is not in the middle of exiting (where 292 * a lot of its structural members may wind up being NULL). If it 293 * is we give up on it. 294 */ 295 if (p) { 296 lwkt_gettoken(&p->p_token); 297 if (p->p_flags & P_POSTEXIT) { 298 lwkt_reltoken(&p->p_token); 299 PRELE(p); 300 p = NULL; 301 } 302 } 303 return p; 304 } 305 306 struct proc * 307 pfs_zpfind(pid_t pfs_pid) 308 { 309 struct proc *p = NULL; 310 311 if (pfs_pid == 0) { 312 p = &proc0; 313 PHOLD(p); 314 } else { 315 p = zpfind(pfs_pid); 316 } 317 318 /* 319 * Make sure the process is not in the middle of exiting (where 320 * a lot of its structural members may wind up being NULL). If it 321 * is we give up on it. 322 */ 323 if (p) { 324 lwkt_gettoken(&p->p_token); 325 if (p->p_flags & P_POSTEXIT) { 326 lwkt_reltoken(&p->p_token); 327 PRELE(p); 328 p = NULL; 329 } 330 } 331 return p; 332 } 333 334 void 335 pfs_pdone(struct proc *p) 336 { 337 if (p) { 338 lwkt_reltoken(&p->p_token); 339 PRELE(p); 340 } 341 } 342 343 int 344 procfs_rw(struct vop_read_args *ap) 345 { 346 struct vnode *vp = ap->a_vp; 347 struct uio *uio = ap->a_uio; 348 struct thread *curtd = uio->uio_td; 349 struct proc *curp; 350 struct pfsnode *pfs = VTOPFS(vp); 351 struct proc *p; 352 struct lwp *lp; 353 int rtval; 354 355 if (curtd == NULL) 356 return (EINVAL); 357 if ((curp = curtd->td_proc) == NULL) /* XXX */ 358 return (EINVAL); 359 360 p = pfs_pfind(pfs->pfs_pid); 361 if (p == NULL) { 362 rtval = EINVAL; 363 goto out; 364 } 365 if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE) { 366 rtval = EACCES; 367 goto out; 368 } 369 /* XXX lwp */ 370 lp = FIRST_LWP_IN_PROC(p); 371 LWPHOLD(lp); 372 373 lockmgr(&pfs->pfs_lock, LK_EXCLUSIVE); 374 375 switch (pfs->pfs_type) { 376 case Pnote: 377 case Pnotepg: 378 rtval = procfs_donote(curp, lp, pfs, uio); 379 break; 380 381 case Pregs: 382 rtval = procfs_doregs(curp, lp, pfs, uio); 383 break; 384 385 case Pfpregs: 386 rtval = procfs_dofpregs(curp, lp, pfs, uio); 387 break; 388 389 case Pdbregs: 390 rtval = procfs_dodbregs(curp, lp, pfs, uio); 391 break; 392 393 case Pctl: 394 rtval = procfs_doctl(curp, lp, pfs, uio); 395 break; 396 397 case Pstatus: 398 rtval = procfs_dostatus(curp, lp, pfs, uio); 399 break; 400 401 case Pmap: 402 rtval = procfs_domap(curp, lp, pfs, uio); 403 break; 404 405 case Pmem: 406 rtval = procfs_domem(curp, lp, pfs, uio); 407 break; 408 409 case Ptype: 410 rtval = procfs_dotype(curp, lp, pfs, uio); 411 break; 412 413 case Pcmdline: 414 rtval = procfs_docmdline(curp, lp, pfs, uio); 415 break; 416 417 case Prlimit: 418 rtval = procfs_dorlimit(curp, lp, pfs, uio); 419 break; 420 421 default: 422 rtval = EOPNOTSUPP; 423 break; 424 } 425 LWPRELE(lp); 426 427 lockmgr(&pfs->pfs_lock, LK_RELEASE); 428 out: 429 pfs_pdone(p); 430 431 return rtval; 432 } 433 434 /* 435 * Get a string from userland into (buf). Strip a trailing 436 * nl character (to allow easy access from the shell). 437 * The buffer should be *buflenp + 1 chars long. vfs_getuserstr 438 * will automatically add a nul char at the end. 439 * 440 * Returns 0 on success or the following errors 441 * 442 * EINVAL: file offset is non-zero. 443 * EMSGSIZE: message is longer than kernel buffer 444 * EFAULT: user i/o buffer is not addressable 445 */ 446 int 447 vfs_getuserstr(struct uio *uio, char *buf, int *buflenp) 448 { 449 int xlen; 450 int error; 451 452 if (uio->uio_offset != 0) 453 return (EINVAL); 454 455 xlen = *buflenp; 456 457 /* must be able to read the whole string in one go */ 458 if (xlen < uio->uio_resid) 459 return (EMSGSIZE); 460 xlen = uio->uio_resid; 461 462 if ((error = uiomove(buf, xlen, uio)) != 0) 463 return (error); 464 465 /* allow multiple writes without seeks */ 466 uio->uio_offset = 0; 467 468 /* cleanup string and remove trailing newline */ 469 buf[xlen] = '\0'; 470 xlen = strlen(buf); 471 if (xlen > 0 && buf[xlen-1] == '\n') 472 buf[--xlen] = '\0'; 473 *buflenp = xlen; 474 475 return (0); 476 } 477 478 vfs_namemap_t * 479 vfs_findname(vfs_namemap_t *nm, char *buf, int buflen) 480 { 481 482 for (; nm->nm_name; nm++) 483 if (bcmp(buf, nm->nm_name, buflen+1) == 0) 484 return (nm); 485 486 return (0); 487 } 488 489 void 490 procfs_exit(struct thread *td) 491 { 492 struct pfshead *ph; 493 struct pfsnode *pfs; 494 struct vnode *vp; 495 pid_t pid; 496 497 KKASSERT(td->td_proc); 498 pid = td->td_proc->p_pid; 499 500 /* 501 * NOTE: We can't just vgone() the vnode any more, not while 502 * it may potentially still be active. This will clean 503 * the vp and clear the mount and cause the new VOP subsystem 504 * to assert or panic when someone tries to do an operation 505 * on an open (exited) procfs descriptor. 506 * 507 * Prevent further operations on this pid by setting pfs_pid to -1. 508 * Note that a pfs_pid of 0 is used for nodes which do not track 509 * any particular pid. 510 * 511 * Use vx_get() to properly ref/lock a vp which may not have any 512 * refs and which may or may not already be reclaimed. vx_put() 513 * will then properly deactivate it and cause it to be recycled. 514 * 515 * The hash table can also get ripped out from under us when 516 * we block so take the easy way out and restart the scan. 517 */ 518 for (;;) { 519 ph = PFSHASH(pid); 520 spin_lock(&ph->spin); 521 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 522 if (pfs->pfs_pid == pid) 523 break; 524 } 525 if (pfs == NULL) { 526 spin_unlock(&ph->spin); 527 break; 528 } 529 vp = PFSTOV(pfs); 530 vhold(vp); 531 spin_unlock(&ph->spin); 532 vx_get(vp); 533 pfs->pfs_pid |= PFS_DEAD; /* does not effect hash */ 534 vx_put(vp); 535 vdrop(vp); 536 } 537 } 538