1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $ 40 * $DragonFly: src/sys/kern/kern_subr.c,v 1.27 2007/01/29 20:44:02 tgen Exp $ 41 */ 42 43 #include "opt_ddb.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/proc.h> 49 #include <sys/malloc.h> 50 #include <sys/lock.h> 51 #include <sys/resourcevar.h> 52 #include <sys/sysctl.h> 53 #include <sys/uio.h> 54 #include <sys/vnode.h> 55 #include <sys/thread2.h> 56 #include <machine/limits.h> 57 58 #include <cpu/lwbuf.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_page.h> 62 #include <vm/vm_map.h> 63 64 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 65 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 66 67 /* 68 * UIO_READ: copy the kernelspace cp to the user or kernelspace UIO 69 * UIO_WRITE: copy the user or kernelspace UIO to the kernelspace cp 70 * 71 * For userspace UIO's, uio_td must be the current thread. 72 * 73 * The syscall interface is responsible for limiting the length to 74 * ssize_t for things like read() or write() which return the bytes 75 * read or written as ssize_t. These functions work with unsigned 76 * lengths. 77 */ 78 int 79 uiomove(caddr_t cp, size_t n, struct uio *uio) 80 { 81 struct iovec *iov; 82 size_t cnt; 83 int error = 0; 84 int save = 0; 85 86 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 87 ("uiomove: mode")); 88 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 89 ("uiomove proc")); 90 91 if (curproc) { 92 save = curproc->p_flag & P_DEADLKTREAT; 93 curproc->p_flag |= P_DEADLKTREAT; 94 } 95 96 while (n > 0 && uio->uio_resid) { 97 iov = uio->uio_iov; 98 cnt = iov->iov_len; 99 if (cnt == 0) { 100 uio->uio_iov++; 101 uio->uio_iovcnt--; 102 continue; 103 } 104 if (cnt > n) 105 cnt = n; 106 107 switch (uio->uio_segflg) { 108 109 case UIO_USERSPACE: 110 lwkt_user_yield(); 111 if (uio->uio_rw == UIO_READ) 112 error = copyout(cp, iov->iov_base, cnt); 113 else 114 error = copyin(iov->iov_base, cp, cnt); 115 if (error) 116 break; 117 break; 118 119 case UIO_SYSSPACE: 120 if (uio->uio_rw == UIO_READ) 121 bcopy((caddr_t)cp, iov->iov_base, cnt); 122 else 123 bcopy(iov->iov_base, (caddr_t)cp, cnt); 124 break; 125 case UIO_NOCOPY: 126 break; 127 } 128 iov->iov_base = (char *)iov->iov_base + cnt; 129 iov->iov_len -= cnt; 130 uio->uio_resid -= cnt; 131 uio->uio_offset += cnt; 132 cp += cnt; 133 n -= cnt; 134 } 135 if (curproc) 136 curproc->p_flag = (curproc->p_flag & ~P_DEADLKTREAT) | save; 137 return (error); 138 } 139 140 /* 141 * Like uiomove() but copies zero-fill. Only allowed for UIO_READ, 142 * for obvious reasons. 143 */ 144 int 145 uiomovez(size_t n, struct uio *uio) 146 { 147 struct iovec *iov; 148 size_t cnt; 149 int error = 0; 150 151 KASSERT(uio->uio_rw == UIO_READ, ("uiomovez: mode")); 152 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 153 ("uiomove proc")); 154 155 while (n > 0 && uio->uio_resid) { 156 iov = uio->uio_iov; 157 cnt = iov->iov_len; 158 if (cnt == 0) { 159 uio->uio_iov++; 160 uio->uio_iovcnt--; 161 continue; 162 } 163 if (cnt > n) 164 cnt = n; 165 166 switch (uio->uio_segflg) { 167 case UIO_USERSPACE: 168 error = copyout(ZeroPage, iov->iov_base, cnt); 169 if (error) 170 break; 171 break; 172 case UIO_SYSSPACE: 173 bzero(iov->iov_base, cnt); 174 break; 175 case UIO_NOCOPY: 176 break; 177 } 178 iov->iov_base = (char *)iov->iov_base + cnt; 179 iov->iov_len -= cnt; 180 uio->uio_resid -= cnt; 181 uio->uio_offset += cnt; 182 n -= cnt; 183 } 184 return (error); 185 } 186 187 /* 188 * Wrapper for uiomove() that validates the arguments against a known-good 189 * kernel buffer. 190 */ 191 int 192 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 193 { 194 size_t offset; 195 196 offset = (size_t)uio->uio_offset; 197 if ((off_t)offset != uio->uio_offset) 198 return (EINVAL); 199 if (buflen == 0 || offset >= buflen) 200 return (0); 201 return (uiomove((char *)buf + offset, buflen - offset, uio)); 202 } 203 204 /* 205 * Give next character to user as result of read. 206 */ 207 int 208 ureadc(int c, struct uio *uio) 209 { 210 struct iovec *iov; 211 char *iov_base; 212 213 again: 214 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 215 panic("ureadc"); 216 iov = uio->uio_iov; 217 if (iov->iov_len == 0) { 218 uio->uio_iovcnt--; 219 uio->uio_iov++; 220 goto again; 221 } 222 switch (uio->uio_segflg) { 223 224 case UIO_USERSPACE: 225 if (subyte(iov->iov_base, c) < 0) 226 return (EFAULT); 227 break; 228 229 case UIO_SYSSPACE: 230 iov_base = iov->iov_base; 231 *iov_base = c; 232 iov->iov_base = iov_base; 233 break; 234 235 case UIO_NOCOPY: 236 break; 237 } 238 iov->iov_base = (char *)iov->iov_base + 1; 239 iov->iov_len--; 240 uio->uio_resid--; 241 uio->uio_offset++; 242 return (0); 243 } 244 245 /* 246 * General routine to allocate a hash table. Make the hash table size a 247 * power of 2 greater or equal to the number of elements requested, and 248 * store the masking value in *hashmask. 249 */ 250 void * 251 hashinit(int elements, struct malloc_type *type, u_long *hashmask) 252 { 253 long hashsize; 254 LIST_HEAD(generic, generic) *hashtbl; 255 int i; 256 257 if (elements <= 0) 258 panic("hashinit: bad elements"); 259 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 260 continue; 261 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 262 for (i = 0; i < hashsize; i++) 263 LIST_INIT(&hashtbl[i]); 264 *hashmask = hashsize - 1; 265 return (hashtbl); 266 } 267 268 /* 269 * This is a newer version which allocates a hash table of structures. 270 * 271 * The returned array will be zero'd. The caller is responsible for 272 * initializing the structures. 273 */ 274 void * 275 hashinit_ext(int elements, size_t size, struct malloc_type *type, 276 u_long *hashmask) 277 { 278 long hashsize; 279 void *hashtbl; 280 281 if (elements <= 0) 282 panic("hashinit: bad elements"); 283 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 284 continue; 285 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 286 *hashmask = hashsize - 1; 287 return (hashtbl); 288 } 289 290 static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 291 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 292 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 293 #define NPRIMES (sizeof(primes) / sizeof(primes[0])) 294 295 /* 296 * General routine to allocate a prime number sized hash table. 297 */ 298 void * 299 phashinit(int elements, struct malloc_type *type, u_long *nentries) 300 { 301 long hashsize; 302 LIST_HEAD(generic, generic) *hashtbl; 303 int i; 304 305 if (elements <= 0) 306 panic("phashinit: bad elements"); 307 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 308 i++; 309 if (i == NPRIMES) 310 break; 311 hashsize = primes[i]; 312 } 313 hashsize = primes[i - 1]; 314 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 315 for (i = 0; i < hashsize; i++) 316 LIST_INIT(&hashtbl[i]); 317 *nentries = hashsize; 318 return (hashtbl); 319 } 320 321 /* 322 * This is a newer version which allocates a hash table of structures 323 * in a prime-number size. 324 * 325 * The returned array will be zero'd. The caller is responsible for 326 * initializing the structures. 327 */ 328 void * 329 phashinit_ext(int elements, size_t size, struct malloc_type *type, 330 u_long *nentries) 331 { 332 long hashsize; 333 void *hashtbl; 334 int i; 335 336 if (elements <= 0) 337 panic("phashinit: bad elements"); 338 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 339 i++; 340 if (i == NPRIMES) 341 break; 342 hashsize = primes[i]; 343 } 344 hashsize = primes[i - 1]; 345 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 346 *nentries = hashsize; 347 return (hashtbl); 348 } 349 350 /* 351 * Copyin an iovec. If the iovec array fits, use the preallocated small 352 * iovec structure. If it is too big, dynamically allocate an iovec array 353 * of sufficient size. 354 * 355 * MPSAFE 356 */ 357 int 358 iovec_copyin(struct iovec *uiov, struct iovec **kiov, struct iovec *siov, 359 size_t iov_cnt, size_t *iov_len) 360 { 361 struct iovec *iovp; 362 int error, i; 363 size_t len; 364 365 if (iov_cnt > UIO_MAXIOV) 366 return EMSGSIZE; 367 if (iov_cnt > UIO_SMALLIOV) { 368 MALLOC(*kiov, struct iovec *, sizeof(struct iovec) * iov_cnt, 369 M_IOV, M_WAITOK); 370 } else { 371 *kiov = siov; 372 } 373 error = copyin(uiov, *kiov, iov_cnt * sizeof(struct iovec)); 374 if (error == 0) { 375 *iov_len = 0; 376 for (i = 0, iovp = *kiov; i < iov_cnt; i++, iovp++) { 377 /* 378 * Check for both *iov_len overflows and out of 379 * range iovp->iov_len's. We limit to the 380 * capabilities of signed integers. 381 * 382 * GCC4 - overflow check opt requires assign/test. 383 */ 384 len = *iov_len + iovp->iov_len; 385 if (len < *iov_len) 386 error = EINVAL; 387 *iov_len = len; 388 } 389 } 390 391 /* 392 * From userland disallow iovec's which exceed the sized size 393 * limit as the system calls return ssize_t. 394 * 395 * NOTE: Internal kernel interfaces can handle the unsigned 396 * limit. 397 */ 398 if (error == 0 && (ssize_t)*iov_len < 0) 399 error = EINVAL; 400 401 if (error) 402 iovec_free(kiov, siov); 403 return (error); 404 } 405 406 407 /* 408 * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> 409 * Copyright (c) 1982, 1986, 1991, 1993 410 * The Regents of the University of California. All rights reserved. 411 * (c) UNIX System Laboratories, Inc. 412 * All or some portions of this file are derived from material licensed 413 * to the University of California by American Telephone and Telegraph 414 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 415 * the permission of UNIX System Laboratories, Inc. 416 * 417 * Redistribution and use in source and binary forms, with or without 418 * modification, are permitted provided that the following conditions 419 * are met: 420 * 1. Redistributions of source code must retain the above copyright 421 * notice, this list of conditions and the following disclaimer. 422 * 2. Redistributions in binary form must reproduce the above copyright 423 * notice, this list of conditions and the following disclaimer in the 424 * documentation and/or other materials provided with the distribution. 425 * 4. Neither the name of the University nor the names of its contributors 426 * may be used to endorse or promote products derived from this software 427 * without specific prior written permission. 428 * 429 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 430 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 431 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 432 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 433 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 434 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 435 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 436 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 437 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 438 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 439 * SUCH DAMAGE. 440 * 441 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 442 * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $ 443 */ 444 445 /* 446 * Implement uiomove(9) from physical memory using lwbuf's to reduce 447 * the creation and destruction of ephemeral mappings. 448 */ 449 int 450 uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, size_t n, struct uio *uio) 451 { 452 struct lwbuf *lwb; 453 struct thread *td = curthread; 454 struct iovec *iov; 455 void *cp; 456 vm_offset_t page_offset; 457 vm_page_t m; 458 size_t cnt; 459 int error = 0; 460 int save = 0; 461 462 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 463 ("uiomove_fromphys: mode")); 464 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 465 ("uiomove_fromphys proc")); 466 467 crit_enter(); 468 save = td->td_flags & TDF_DEADLKTREAT; 469 td->td_flags |= TDF_DEADLKTREAT; 470 crit_exit(); 471 472 while (n > 0 && uio->uio_resid) { 473 iov = uio->uio_iov; 474 cnt = iov->iov_len; 475 if (cnt == 0) { 476 uio->uio_iov++; 477 uio->uio_iovcnt--; 478 continue; 479 } 480 if (cnt > n) 481 cnt = n; 482 page_offset = offset & PAGE_MASK; 483 cnt = min(cnt, PAGE_SIZE - page_offset); 484 m = ma[offset >> PAGE_SHIFT]; 485 lwb = lwbuf_alloc(m); 486 cp = (char *)lwbuf_kva(lwb) + page_offset; 487 switch (uio->uio_segflg) { 488 case UIO_USERSPACE: 489 /* 490 * note: removed uioyield (it was the wrong place to 491 * put it). 492 */ 493 if (uio->uio_rw == UIO_READ) 494 error = copyout(cp, iov->iov_base, cnt); 495 else 496 error = copyin(iov->iov_base, cp, cnt); 497 if (error) { 498 lwbuf_free(lwb); 499 goto out; 500 } 501 break; 502 case UIO_SYSSPACE: 503 if (uio->uio_rw == UIO_READ) 504 bcopy(cp, iov->iov_base, cnt); 505 else 506 bcopy(iov->iov_base, cp, cnt); 507 break; 508 case UIO_NOCOPY: 509 break; 510 } 511 lwbuf_free(lwb); 512 iov->iov_base = (char *)iov->iov_base + cnt; 513 iov->iov_len -= cnt; 514 uio->uio_resid -= cnt; 515 uio->uio_offset += cnt; 516 offset += cnt; 517 n -= cnt; 518 } 519 out: 520 if (save == 0) { 521 crit_enter(); 522 td->td_flags &= ~TDF_DEADLKTREAT; 523 crit_exit(); 524 } 525 return (error); 526 } 527 528