1 /* 2 * Copyright (c) 1993 Jan-Simon Pendry 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_subr.c,v 1.26.2.3 2002/02/18 21:28:04 des Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/proc.h> 42 #include <sys/mount.h> 43 #include <sys/vnode.h> 44 #include <sys/malloc.h> 45 #include <sys/thread2.h> 46 47 #include <vfs/procfs/procfs.h> 48 49 #define PFS_HSIZE 256 50 #define PFS_HMASK (PFS_HSIZE - 1) 51 52 static struct pfsnode *pfshead[PFS_HSIZE]; 53 static struct lock procfslk = LOCK_INITIALIZER("pvplk", 0, 0); 54 55 #define PFSHASH(pid) &pfshead[(pid) & PFS_HMASK] 56 57 /* 58 * Allocate a pfsnode/vnode pair. If no error occurs the returned vnode 59 * will be referenced and exclusively locked. 60 * 61 * The pid, pfs_type, and mount point uniquely identify a pfsnode. 62 * The mount point is needed because someone might mount this filesystem 63 * twice. 64 * 65 * All pfsnodes are maintained on a singly-linked list. new nodes are 66 * only allocated when they cannot be found on this list. entries on 67 * the list are removed when the vfs reclaim entry is called. 68 * 69 * A single lock is kept for the entire list. this is needed because the 70 * getnewvnode() function can block waiting for a vnode to become free, 71 * in which case there may be more than one process trying to get the same 72 * vnode. this lock is only taken if we are going to call getnewvnode, 73 * since the kernel itself is single-threaded. 74 * 75 * If an entry is found on the list, then call vget() to take a reference 76 * and obtain the lock. This will properly re-reference the vnode if it 77 * had gotten onto the free list. 78 */ 79 int 80 procfs_allocvp(struct mount *mp, struct vnode **vpp, long pid, pfstype pfs_type) 81 { 82 struct pfsnode *pfs; 83 struct vnode *vp; 84 struct pfsnode **pp; 85 int error; 86 87 pp = PFSHASH(pid); 88 loop: 89 for (pfs = *pp; pfs; pfs = pfs->pfs_next) { 90 if (pfs->pfs_pid == pid && pfs->pfs_type == pfs_type && 91 PFSTOV(pfs)->v_mount == mp) { 92 vp = PFSTOV(pfs); 93 vhold(vp); 94 if (vget(vp, LK_EXCLUSIVE)) { 95 vdrop(vp); 96 goto loop; 97 } 98 99 /* 100 * Make sure the vnode is still in the cache after 101 * getting the interlock to avoid racing a free. 102 */ 103 for (pfs = *pp; pfs; pfs = pfs->pfs_next) { 104 if (PFSTOV(pfs) == vp && 105 pfs->pfs_pid == pid && 106 pfs->pfs_type == pfs_type && 107 PFSTOV(pfs)->v_mount == mp) { 108 break; 109 } 110 } 111 vdrop(vp); 112 if (pfs == NULL || PFSTOV(pfs) != vp) { 113 vput(vp); 114 goto loop; 115 116 } 117 KKASSERT(vp->v_data == pfs); 118 *vpp = vp; 119 return (0); 120 } 121 } 122 123 /* 124 * otherwise lock the vp list while we call getnewvnode 125 * since that can block. 126 */ 127 if (lockmgr(&procfslk, LK_EXCLUSIVE|LK_SLEEPFAIL)) 128 goto loop; 129 130 /* 131 * Do the MALLOC before the getnewvnode since doing so afterward 132 * might cause a bogus v_data pointer to get dereferenced 133 * elsewhere if MALLOC should block. 134 * 135 * XXX this may not matter anymore since getnewvnode now returns 136 * a VX locked vnode. 137 */ 138 pfs = kmalloc(sizeof(struct pfsnode), M_TEMP, M_WAITOK); 139 140 error = getnewvnode(VT_PROCFS, mp, vpp, 0, 0); 141 if (error) { 142 kfree(pfs, M_TEMP); 143 goto out; 144 } 145 vp = *vpp; 146 147 vp->v_data = pfs; 148 149 pfs->pfs_next = 0; 150 pfs->pfs_pid = (pid_t) pid; 151 pfs->pfs_type = pfs_type; 152 pfs->pfs_vnode = vp; 153 pfs->pfs_flags = 0; 154 pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type); 155 lockinit(&pfs->pfs_lock, "pfslk", 0, 0); 156 157 switch (pfs_type) { 158 case Proot: /* /proc = dr-xr-xr-x */ 159 pfs->pfs_mode = (VREAD|VEXEC) | 160 (VREAD|VEXEC) >> 3 | 161 (VREAD|VEXEC) >> 6; 162 vp->v_type = VDIR; 163 vp->v_flag = VROOT; 164 break; 165 166 case Pcurproc: /* /proc/curproc = lr--r--r-- */ 167 pfs->pfs_mode = (VREAD) | 168 (VREAD >> 3) | 169 (VREAD >> 6); 170 vp->v_type = VLNK; 171 break; 172 173 case Pproc: 174 pfs->pfs_mode = (VREAD|VEXEC) | 175 (VREAD|VEXEC) >> 3 | 176 (VREAD|VEXEC) >> 6; 177 vp->v_type = VDIR; 178 break; 179 180 case Pfile: 181 pfs->pfs_mode = (VREAD|VEXEC) | 182 (VREAD|VEXEC) >> 3 | 183 (VREAD|VEXEC) >> 6; 184 vp->v_type = VLNK; 185 break; 186 187 case Pmem: 188 pfs->pfs_mode = (VREAD|VWRITE); 189 vp->v_type = VREG; 190 break; 191 192 case Pregs: 193 case Pfpregs: 194 case Pdbregs: 195 pfs->pfs_mode = (VREAD|VWRITE); 196 vp->v_type = VREG; 197 break; 198 199 case Pctl: 200 case Pnote: 201 case Pnotepg: 202 pfs->pfs_mode = (VWRITE); 203 vp->v_type = VREG; 204 break; 205 206 case Ptype: 207 case Pmap: 208 case Pstatus: 209 case Pcmdline: 210 case Prlimit: 211 pfs->pfs_mode = (VREAD) | 212 (VREAD >> 3) | 213 (VREAD >> 6); 214 vp->v_type = VREG; 215 break; 216 217 default: 218 panic("procfs_allocvp"); 219 } 220 221 /* add to procfs vnode list */ 222 pfs->pfs_next = *pp; 223 *pp = pfs; 224 225 out: 226 lockmgr(&procfslk, LK_RELEASE); 227 228 return (error); 229 } 230 231 int 232 procfs_freevp(struct vnode *vp) 233 { 234 struct pfsnode **pfspp; 235 struct pfsnode *pfs; 236 237 pfs = VTOPFS(vp); 238 vp->v_data = NULL; 239 240 pfspp = PFSHASH(pfs->pfs_pid); 241 while (*pfspp != pfs && *pfspp) 242 pfspp = &(*pfspp)->pfs_next; 243 KKASSERT(*pfspp); 244 *pfspp = pfs->pfs_next; 245 pfs->pfs_next = NULL; 246 pfs->pfs_vnode = NULL; 247 kfree(pfs, M_TEMP); 248 return (0); 249 } 250 251 /* 252 * Try to find the calling pid. Note that pfind() 253 * now references the proc structure to be returned 254 * and needs to be released later with PRELE(). 255 */ 256 struct proc * 257 pfs_pfind(pid_t pfs_pid) 258 { 259 struct proc *p = NULL; 260 261 if (pfs_pid == 0) { 262 p = &proc0; 263 PHOLD(p); 264 } else { 265 p = pfind(pfs_pid); 266 } 267 268 /* 269 * Make sure the process is not in the middle of exiting (where 270 * a lot of its structural members may wind up being NULL). If it 271 * is we give up on it. 272 */ 273 if (p) { 274 lwkt_gettoken(&p->p_token); 275 if (p->p_flags & P_POSTEXIT) { 276 lwkt_reltoken(&p->p_token); 277 PRELE(p); 278 p = NULL; 279 } 280 } 281 return p; 282 } 283 284 struct proc * 285 pfs_zpfind(pid_t pfs_pid) 286 { 287 struct proc *p = NULL; 288 289 if (pfs_pid == 0) { 290 p = &proc0; 291 PHOLD(p); 292 } else { 293 p = zpfind(pfs_pid); 294 } 295 296 /* 297 * Make sure the process is not in the middle of exiting (where 298 * a lot of its structural members may wind up being NULL). If it 299 * is we give up on it. 300 */ 301 if (p) { 302 lwkt_gettoken(&p->p_token); 303 if (p->p_flags & P_POSTEXIT) { 304 lwkt_reltoken(&p->p_token); 305 PRELE(p); 306 p = NULL; 307 } 308 } 309 return p; 310 } 311 312 void 313 pfs_pdone(struct proc *p) 314 { 315 if (p) { 316 lwkt_reltoken(&p->p_token); 317 PRELE(p); 318 } 319 } 320 321 int 322 procfs_rw(struct vop_read_args *ap) 323 { 324 struct vnode *vp = ap->a_vp; 325 struct uio *uio = ap->a_uio; 326 struct thread *curtd = uio->uio_td; 327 struct proc *curp; 328 struct pfsnode *pfs = VTOPFS(vp); 329 struct proc *p; 330 struct lwp *lp; 331 int rtval; 332 333 if (curtd == NULL) 334 return (EINVAL); 335 if ((curp = curtd->td_proc) == NULL) /* XXX */ 336 return (EINVAL); 337 338 p = pfs_pfind(pfs->pfs_pid); 339 if (p == NULL) { 340 rtval = EINVAL; 341 goto out; 342 } 343 if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE) { 344 rtval = EACCES; 345 goto out; 346 } 347 /* XXX lwp */ 348 lp = FIRST_LWP_IN_PROC(p); 349 LWPHOLD(lp); 350 351 lockmgr(&pfs->pfs_lock, LK_EXCLUSIVE); 352 353 switch (pfs->pfs_type) { 354 case Pnote: 355 case Pnotepg: 356 rtval = procfs_donote(curp, lp, pfs, uio); 357 break; 358 359 case Pregs: 360 rtval = procfs_doregs(curp, lp, pfs, uio); 361 break; 362 363 case Pfpregs: 364 rtval = procfs_dofpregs(curp, lp, pfs, uio); 365 break; 366 367 case Pdbregs: 368 rtval = procfs_dodbregs(curp, lp, pfs, uio); 369 break; 370 371 case Pctl: 372 rtval = procfs_doctl(curp, lp, pfs, uio); 373 break; 374 375 case Pstatus: 376 rtval = procfs_dostatus(curp, lp, pfs, uio); 377 break; 378 379 case Pmap: 380 rtval = procfs_domap(curp, lp, pfs, uio); 381 break; 382 383 case Pmem: 384 rtval = procfs_domem(curp, lp, pfs, uio); 385 break; 386 387 case Ptype: 388 rtval = procfs_dotype(curp, lp, pfs, uio); 389 break; 390 391 case Pcmdline: 392 rtval = procfs_docmdline(curp, lp, pfs, uio); 393 break; 394 395 case Prlimit: 396 rtval = procfs_dorlimit(curp, lp, pfs, uio); 397 break; 398 399 default: 400 rtval = EOPNOTSUPP; 401 break; 402 } 403 LWPRELE(lp); 404 405 lockmgr(&pfs->pfs_lock, LK_RELEASE); 406 out: 407 pfs_pdone(p); 408 409 return rtval; 410 } 411 412 /* 413 * Get a string from userland into (buf). Strip a trailing 414 * nl character (to allow easy access from the shell). 415 * The buffer should be *buflenp + 1 chars long. vfs_getuserstr 416 * will automatically add a nul char at the end. 417 * 418 * Returns 0 on success or the following errors 419 * 420 * EINVAL: file offset is non-zero. 421 * EMSGSIZE: message is longer than kernel buffer 422 * EFAULT: user i/o buffer is not addressable 423 */ 424 int 425 vfs_getuserstr(struct uio *uio, char *buf, int *buflenp) 426 { 427 int xlen; 428 int error; 429 430 if (uio->uio_offset != 0) 431 return (EINVAL); 432 433 xlen = *buflenp; 434 435 /* must be able to read the whole string in one go */ 436 if (xlen < uio->uio_resid) 437 return (EMSGSIZE); 438 xlen = uio->uio_resid; 439 440 if ((error = uiomove(buf, xlen, uio)) != 0) 441 return (error); 442 443 /* allow multiple writes without seeks */ 444 uio->uio_offset = 0; 445 446 /* cleanup string and remove trailing newline */ 447 buf[xlen] = '\0'; 448 xlen = strlen(buf); 449 if (xlen > 0 && buf[xlen-1] == '\n') 450 buf[--xlen] = '\0'; 451 *buflenp = xlen; 452 453 return (0); 454 } 455 456 vfs_namemap_t * 457 vfs_findname(vfs_namemap_t *nm, char *buf, int buflen) 458 { 459 460 for (; nm->nm_name; nm++) 461 if (bcmp(buf, nm->nm_name, buflen+1) == 0) 462 return (nm); 463 464 return (0); 465 } 466 467 void 468 procfs_exit(struct thread *td) 469 { 470 struct pfsnode *pfs; 471 struct vnode *vp; 472 pid_t pid; 473 474 KKASSERT(td->td_proc); 475 pid = td->td_proc->p_pid; 476 477 /* 478 * NOTE: We can't just vgone() the vnode any more, not while 479 * it may potentially still be active. This will clean 480 * the vp and clear the mount and cause the new VOP subsystem 481 * to assert or panic when someone tries to do an operation 482 * on an open (exited) procfs descriptor. 483 * 484 * Prevent further operations on this pid by setting pfs_pid to -1. 485 * Note that a pfs_pid of 0 is used for nodes which do not track 486 * any particular pid. 487 * 488 * Use vx_get() to properly ref/lock a vp which may not have any 489 * refs and which may or may not already be reclaimed. vx_put() 490 * will then properly deactivate it and cause it to be recycled. 491 * 492 * The hash table can also get ripped out from under us when 493 * we block so take the easy way out and restart the scan. 494 */ 495 again: 496 pfs = *PFSHASH(pid); 497 while (pfs) { 498 if (pfs->pfs_pid == pid) { 499 vp = PFSTOV(pfs); 500 vx_get(vp); 501 pfs->pfs_pid |= PFS_DEAD; /* does not effect hash */ 502 vx_put(vp); 503 goto again; 504 } 505 pfs = pfs->pfs_next; 506 } 507 } 508 509