1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $ 40 * $DragonFly: src/sys/kern/kern_subr.c,v 1.27 2007/01/29 20:44:02 tgen Exp $ 41 */ 42 43 #include "opt_ddb.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/proc.h> 49 #include <sys/malloc.h> 50 #include <sys/lock.h> 51 #include <sys/resourcevar.h> 52 #include <sys/sysctl.h> 53 #include <sys/uio.h> 54 #include <sys/vnode.h> 55 #include <sys/sfbuf.h> 56 #include <sys/thread2.h> 57 #include <machine/limits.h> 58 59 #include <vm/vm.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_map.h> 62 63 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 64 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 65 66 /* 67 * UIO_READ: copy the kernelspace cp to the user or kernelspace UIO 68 * UIO_WRITE: copy the user or kernelspace UIO to the kernelspace cp 69 * 70 * For userspace UIO's, uio_td must be the current thread. 71 * 72 * The syscall interface is responsible for limiting the length to 73 * ssize_t for things like read() or write() which return the bytes 74 * read or written as ssize_t. These functions work with unsigned 75 * lengths. 76 */ 77 int 78 uiomove(caddr_t cp, size_t n, struct uio *uio) 79 { 80 struct iovec *iov; 81 size_t cnt; 82 int error = 0; 83 int save = 0; 84 int baseticks = ticks; 85 86 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 87 ("uiomove: mode")); 88 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 89 ("uiomove proc")); 90 91 if (curproc) { 92 save = curproc->p_flag & P_DEADLKTREAT; 93 curproc->p_flag |= P_DEADLKTREAT; 94 } 95 96 while (n > 0 && uio->uio_resid) { 97 iov = uio->uio_iov; 98 cnt = iov->iov_len; 99 if (cnt == 0) { 100 uio->uio_iov++; 101 uio->uio_iovcnt--; 102 continue; 103 } 104 if (cnt > n) 105 cnt = n; 106 107 switch (uio->uio_segflg) { 108 109 case UIO_USERSPACE: 110 if (ticks - baseticks >= hogticks) { 111 uio_yield(); 112 baseticks = ticks; 113 } 114 if (uio->uio_rw == UIO_READ) 115 error = copyout(cp, iov->iov_base, cnt); 116 else 117 error = copyin(iov->iov_base, cp, cnt); 118 if (error) 119 break; 120 break; 121 122 case UIO_SYSSPACE: 123 if (uio->uio_rw == UIO_READ) 124 bcopy((caddr_t)cp, iov->iov_base, cnt); 125 else 126 bcopy(iov->iov_base, (caddr_t)cp, cnt); 127 break; 128 case UIO_NOCOPY: 129 break; 130 } 131 iov->iov_base = (char *)iov->iov_base + cnt; 132 iov->iov_len -= cnt; 133 uio->uio_resid -= cnt; 134 uio->uio_offset += cnt; 135 cp += cnt; 136 n -= cnt; 137 } 138 if (curproc) 139 curproc->p_flag = (curproc->p_flag & ~P_DEADLKTREAT) | save; 140 return (error); 141 } 142 143 /* 144 * Like uiomove() but copies zero-fill. Only allowed for UIO_READ, 145 * for obvious reasons. 146 */ 147 int 148 uiomovez(size_t n, struct uio *uio) 149 { 150 struct iovec *iov; 151 size_t cnt; 152 int error = 0; 153 154 KASSERT(uio->uio_rw == UIO_READ, ("uiomovez: mode")); 155 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 156 ("uiomove proc")); 157 158 while (n > 0 && uio->uio_resid) { 159 iov = uio->uio_iov; 160 cnt = iov->iov_len; 161 if (cnt == 0) { 162 uio->uio_iov++; 163 uio->uio_iovcnt--; 164 continue; 165 } 166 if (cnt > n) 167 cnt = n; 168 169 switch (uio->uio_segflg) { 170 case UIO_USERSPACE: 171 error = copyout(ZeroPage, iov->iov_base, cnt); 172 if (error) 173 break; 174 break; 175 case UIO_SYSSPACE: 176 bzero(iov->iov_base, cnt); 177 break; 178 case UIO_NOCOPY: 179 break; 180 } 181 iov->iov_base = (char *)iov->iov_base + cnt; 182 iov->iov_len -= cnt; 183 uio->uio_resid -= cnt; 184 uio->uio_offset += cnt; 185 n -= cnt; 186 } 187 return (error); 188 } 189 190 /* 191 * Wrapper for uiomove() that validates the arguments against a known-good 192 * kernel buffer. 193 */ 194 int 195 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 196 { 197 size_t offset; 198 199 offset = (size_t)uio->uio_offset; 200 if ((off_t)offset != uio->uio_offset) 201 return (EINVAL); 202 if (buflen == 0 || offset >= buflen) 203 return (0); 204 return (uiomove((char *)buf + offset, buflen - offset, uio)); 205 } 206 207 /* 208 * Give next character to user as result of read. 209 */ 210 int 211 ureadc(int c, struct uio *uio) 212 { 213 struct iovec *iov; 214 char *iov_base; 215 216 again: 217 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 218 panic("ureadc"); 219 iov = uio->uio_iov; 220 if (iov->iov_len == 0) { 221 uio->uio_iovcnt--; 222 uio->uio_iov++; 223 goto again; 224 } 225 switch (uio->uio_segflg) { 226 227 case UIO_USERSPACE: 228 if (subyte(iov->iov_base, c) < 0) 229 return (EFAULT); 230 break; 231 232 case UIO_SYSSPACE: 233 iov_base = iov->iov_base; 234 *iov_base = c; 235 iov->iov_base = iov_base; 236 break; 237 238 case UIO_NOCOPY: 239 break; 240 } 241 iov->iov_base = (char *)iov->iov_base + 1; 242 iov->iov_len--; 243 uio->uio_resid--; 244 uio->uio_offset++; 245 return (0); 246 } 247 248 /* 249 * General routine to allocate a hash table. Make the hash table size a 250 * power of 2 greater or equal to the number of elements requested, and 251 * store the masking value in *hashmask. 252 */ 253 void * 254 hashinit(int elements, struct malloc_type *type, u_long *hashmask) 255 { 256 long hashsize; 257 LIST_HEAD(generic, generic) *hashtbl; 258 int i; 259 260 if (elements <= 0) 261 panic("hashinit: bad elements"); 262 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 263 continue; 264 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 265 for (i = 0; i < hashsize; i++) 266 LIST_INIT(&hashtbl[i]); 267 *hashmask = hashsize - 1; 268 return (hashtbl); 269 } 270 271 static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 272 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 273 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 274 #define NPRIMES (sizeof(primes) / sizeof(primes[0])) 275 276 /* 277 * General routine to allocate a prime number sized hash table. 278 */ 279 void * 280 phashinit(int elements, struct malloc_type *type, u_long *nentries) 281 { 282 long hashsize; 283 LIST_HEAD(generic, generic) *hashtbl; 284 int i; 285 286 if (elements <= 0) 287 panic("phashinit: bad elements"); 288 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 289 i++; 290 if (i == NPRIMES) 291 break; 292 hashsize = primes[i]; 293 } 294 hashsize = primes[i - 1]; 295 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 296 for (i = 0; i < hashsize; i++) 297 LIST_INIT(&hashtbl[i]); 298 *nentries = hashsize; 299 return (hashtbl); 300 } 301 302 /* 303 * Copyin an iovec. If the iovec array fits, use the preallocated small 304 * iovec structure. If it is too big, dynamically allocate an iovec array 305 * of sufficient size. 306 * 307 * MPSAFE 308 */ 309 int 310 iovec_copyin(struct iovec *uiov, struct iovec **kiov, struct iovec *siov, 311 size_t iov_cnt, size_t *iov_len) 312 { 313 struct iovec *iovp; 314 int error, i; 315 size_t len; 316 317 if (iov_cnt > UIO_MAXIOV) 318 return EMSGSIZE; 319 if (iov_cnt > UIO_SMALLIOV) { 320 MALLOC(*kiov, struct iovec *, sizeof(struct iovec) * iov_cnt, 321 M_IOV, M_WAITOK); 322 } else { 323 *kiov = siov; 324 } 325 error = copyin(uiov, *kiov, iov_cnt * sizeof(struct iovec)); 326 if (error == 0) { 327 *iov_len = 0; 328 for (i = 0, iovp = *kiov; i < iov_cnt; i++, iovp++) { 329 /* 330 * Check for both *iov_len overflows and out of 331 * range iovp->iov_len's. We limit to the 332 * capabilities of signed integers. 333 * 334 * GCC4 - overflow check opt requires assign/test. 335 */ 336 len = *iov_len + iovp->iov_len; 337 if (len < *iov_len) 338 error = EINVAL; 339 *iov_len = len; 340 } 341 } 342 343 /* 344 * From userland disallow iovec's which exceed the sized size 345 * limit as the system calls return ssize_t. 346 * 347 * NOTE: Internal kernel interfaces can handle the unsigned 348 * limit. 349 */ 350 if (error == 0 && (ssize_t)*iov_len < 0) 351 error = EINVAL; 352 353 if (error) 354 iovec_free(kiov, siov); 355 return (error); 356 } 357 358 359 /* 360 * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> 361 * Copyright (c) 1982, 1986, 1991, 1993 362 * The Regents of the University of California. All rights reserved. 363 * (c) UNIX System Laboratories, Inc. 364 * All or some portions of this file are derived from material licensed 365 * to the University of California by American Telephone and Telegraph 366 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 367 * the permission of UNIX System Laboratories, Inc. 368 * 369 * Redistribution and use in source and binary forms, with or without 370 * modification, are permitted provided that the following conditions 371 * are met: 372 * 1. Redistributions of source code must retain the above copyright 373 * notice, this list of conditions and the following disclaimer. 374 * 2. Redistributions in binary form must reproduce the above copyright 375 * notice, this list of conditions and the following disclaimer in the 376 * documentation and/or other materials provided with the distribution. 377 * 4. Neither the name of the University nor the names of its contributors 378 * may be used to endorse or promote products derived from this software 379 * without specific prior written permission. 380 * 381 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 382 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 383 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 384 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 385 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 386 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 387 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 388 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 389 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 390 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 391 * SUCH DAMAGE. 392 * 393 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 394 * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $ 395 */ 396 397 /* 398 * Implement uiomove(9) from physical memory using sf_bufs to reduce 399 * the creation and destruction of ephemeral mappings. 400 */ 401 int 402 uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, size_t n, struct uio *uio) 403 { 404 struct sf_buf *sf; 405 struct thread *td = curthread; 406 struct iovec *iov; 407 void *cp; 408 vm_offset_t page_offset; 409 vm_page_t m; 410 size_t cnt; 411 int error = 0; 412 int save = 0; 413 414 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 415 ("uiomove_fromphys: mode")); 416 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 417 ("uiomove_fromphys proc")); 418 419 crit_enter(); 420 save = td->td_flags & TDF_DEADLKTREAT; 421 td->td_flags |= TDF_DEADLKTREAT; 422 crit_exit(); 423 424 while (n > 0 && uio->uio_resid) { 425 iov = uio->uio_iov; 426 cnt = iov->iov_len; 427 if (cnt == 0) { 428 uio->uio_iov++; 429 uio->uio_iovcnt--; 430 continue; 431 } 432 if (cnt > n) 433 cnt = n; 434 page_offset = offset & PAGE_MASK; 435 cnt = min(cnt, PAGE_SIZE - page_offset); 436 m = ma[offset >> PAGE_SHIFT]; 437 sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 438 cp = (char *)sf_buf_kva(sf) + page_offset; 439 switch (uio->uio_segflg) { 440 case UIO_USERSPACE: 441 /* 442 * note: removed uioyield (it was the wrong place to 443 * put it). 444 */ 445 if (uio->uio_rw == UIO_READ) 446 error = copyout(cp, iov->iov_base, cnt); 447 else 448 error = copyin(iov->iov_base, cp, cnt); 449 if (error) { 450 sf_buf_free(sf); 451 goto out; 452 } 453 break; 454 case UIO_SYSSPACE: 455 if (uio->uio_rw == UIO_READ) 456 bcopy(cp, iov->iov_base, cnt); 457 else 458 bcopy(iov->iov_base, cp, cnt); 459 break; 460 case UIO_NOCOPY: 461 break; 462 } 463 sf_buf_free(sf); 464 iov->iov_base = (char *)iov->iov_base + cnt; 465 iov->iov_len -= cnt; 466 uio->uio_resid -= cnt; 467 uio->uio_offset += cnt; 468 offset += cnt; 469 n -= cnt; 470 } 471 out: 472 if (save == 0) { 473 crit_enter(); 474 td->td_flags &= ~TDF_DEADLKTREAT; 475 crit_exit(); 476 } 477 return (error); 478 } 479 480