1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/proc.h> 46 #include <sys/malloc.h> 47 #include <sys/lock.h> 48 #include <sys/resourcevar.h> 49 #include <sys/sysctl.h> 50 #include <sys/uio.h> 51 #include <sys/vnode.h> 52 #include <sys/thread2.h> 53 #include <machine/limits.h> 54 55 #include <cpu/lwbuf.h> 56 57 #include <vm/vm.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_map.h> 60 61 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 62 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 63 64 /* 65 * UIO_READ: copy the kernelspace cp to the user or kernelspace UIO 66 * UIO_WRITE: copy the user or kernelspace UIO to the kernelspace cp 67 * 68 * For userspace UIO's, uio_td must be the current thread. 69 * 70 * The syscall interface is responsible for limiting the length to 71 * ssize_t for things like read() or write() which return the bytes 72 * read or written as ssize_t. These functions work with unsigned 73 * lengths. 74 */ 75 int 76 uiomove(caddr_t cp, size_t n, struct uio *uio) 77 { 78 thread_t td = curthread; 79 struct iovec *iov; 80 size_t cnt; 81 size_t tot; 82 int error = 0; 83 int save = 0; 84 85 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 86 ("uiomove: mode")); 87 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, 88 ("uiomove proc")); 89 90 crit_enter(); 91 save = td->td_flags & TDF_DEADLKTREAT; 92 td->td_flags |= TDF_DEADLKTREAT; 93 crit_exit(); 94 95 tot = 0; 96 97 while (n > 0 && uio->uio_resid) { 98 iov = uio->uio_iov; 99 cnt = iov->iov_len; 100 if (cnt == 0) { 101 uio->uio_iov++; 102 uio->uio_iovcnt--; 103 continue; 104 } 105 if (cnt > n) 106 cnt = n; 107 tot += cnt; 108 109 switch (uio->uio_segflg) { 110 case UIO_USERSPACE: 111 if (tot > 1024*1024) 112 lwkt_user_yield(); 113 if (uio->uio_rw == UIO_READ) 114 error = copyout(cp, iov->iov_base, cnt); 115 else 116 error = copyin(iov->iov_base, cp, cnt); 117 if (error) 118 break; 119 break; 120 121 case UIO_SYSSPACE: 122 if (uio->uio_rw == UIO_READ) 123 bcopy(cp, iov->iov_base, cnt); 124 else 125 bcopy(iov->iov_base, cp, cnt); 126 break; 127 case UIO_NOCOPY: 128 break; 129 } 130 iov->iov_base = (char *)iov->iov_base + cnt; 131 iov->iov_len -= cnt; 132 uio->uio_resid -= cnt; 133 uio->uio_offset += cnt; 134 cp += cnt; 135 n -= cnt; 136 } 137 crit_enter(); 138 td->td_flags = (td->td_flags & ~TDF_DEADLKTREAT) | save; 139 crit_exit(); 140 return (error); 141 } 142 143 /* 144 * This is the same as uiomove() except (cp, n) is within the bounds of 145 * the passed, locked buffer. Under certain circumstances a VM fault 146 * occuring with a locked buffer held can result in a deadlock or an 147 * attempt to recursively lock the buffer. 148 * 149 * This procedure deals with these cases. 150 * 151 * If the buffer represents a regular file, is B_CACHE, but the last VM page 152 * is not fully valid we fix-up the last VM page. This should handle the 153 * recursive lock issue. 154 * 155 * Deadlocks are another issue. We are holding the vp and the bp locked 156 * and could deadlock against a different vp and/or bp if another thread is 157 * trying to access us while we accessing it. The only solution here is 158 * to release the bp and vnode lock and do the uio to/from a system buffer, 159 * then regain the locks and copyback (if applicable). XXX TODO. 160 */ 161 int 162 uiomovebp(struct buf *bp, caddr_t cp, size_t n, struct uio *uio) 163 { 164 int count; 165 vm_page_t m; 166 167 if (bp->b_vp && bp->b_vp->v_type == VREG && 168 (bp->b_flags & B_CACHE) && 169 (count = bp->b_xio.xio_npages) != 0 && 170 (m = bp->b_xio.xio_pages[count-1])->valid != VM_PAGE_BITS_ALL) { 171 vm_page_zero_invalid(m, TRUE); 172 } 173 return (uiomove(cp, n, uio)); 174 } 175 176 /* 177 * Like uiomove() but copies zero-fill. Only allowed for UIO_READ, 178 * for obvious reasons. 179 */ 180 int 181 uiomovez(size_t n, struct uio *uio) 182 { 183 struct iovec *iov; 184 size_t cnt; 185 int error = 0; 186 187 KASSERT(uio->uio_rw == UIO_READ, ("uiomovez: mode")); 188 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 189 ("uiomove proc")); 190 191 while (n > 0 && uio->uio_resid) { 192 iov = uio->uio_iov; 193 cnt = iov->iov_len; 194 if (cnt == 0) { 195 uio->uio_iov++; 196 uio->uio_iovcnt--; 197 continue; 198 } 199 if (cnt > n) 200 cnt = n; 201 202 switch (uio->uio_segflg) { 203 case UIO_USERSPACE: 204 error = copyout(ZeroPage, iov->iov_base, cnt); 205 if (error) 206 break; 207 break; 208 case UIO_SYSSPACE: 209 bzero(iov->iov_base, cnt); 210 break; 211 case UIO_NOCOPY: 212 break; 213 } 214 iov->iov_base = (char *)iov->iov_base + cnt; 215 iov->iov_len -= cnt; 216 uio->uio_resid -= cnt; 217 uio->uio_offset += cnt; 218 n -= cnt; 219 } 220 return (error); 221 } 222 223 /* 224 * Wrapper for uiomove() that validates the arguments against a known-good 225 * kernel buffer. This function automatically indexes the buffer by 226 * uio_offset and handles all range checking. 227 */ 228 int 229 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 230 { 231 size_t offset; 232 233 offset = (size_t)uio->uio_offset; 234 if ((off_t)offset != uio->uio_offset) 235 return (EINVAL); 236 if (buflen == 0 || offset >= buflen) 237 return (0); 238 return (uiomove((char *)buf + offset, buflen - offset, uio)); 239 } 240 241 /* 242 * Give next character to user as result of read. 243 */ 244 int 245 ureadc(int c, struct uio *uio) 246 { 247 struct iovec *iov; 248 char *iov_base; 249 250 again: 251 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 252 panic("ureadc"); 253 iov = uio->uio_iov; 254 if (iov->iov_len == 0) { 255 uio->uio_iovcnt--; 256 uio->uio_iov++; 257 goto again; 258 } 259 switch (uio->uio_segflg) { 260 261 case UIO_USERSPACE: 262 if (subyte(iov->iov_base, c) < 0) 263 return (EFAULT); 264 break; 265 266 case UIO_SYSSPACE: 267 iov_base = iov->iov_base; 268 *iov_base = c; 269 iov->iov_base = iov_base; 270 break; 271 272 case UIO_NOCOPY: 273 break; 274 } 275 iov->iov_base = (char *)iov->iov_base + 1; 276 iov->iov_len--; 277 uio->uio_resid--; 278 uio->uio_offset++; 279 return (0); 280 } 281 282 /* 283 * General routine to allocate a hash table. Make the hash table size a 284 * power of 2 greater or equal to the number of elements requested, and 285 * store the masking value in *hashmask. 286 */ 287 void * 288 hashinit(int elements, struct malloc_type *type, u_long *hashmask) 289 { 290 long hashsize; 291 LIST_HEAD(generic, generic) *hashtbl; 292 int i; 293 294 if (elements <= 0) 295 panic("hashinit: bad elements"); 296 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 297 continue; 298 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 299 for (i = 0; i < hashsize; i++) 300 LIST_INIT(&hashtbl[i]); 301 *hashmask = hashsize - 1; 302 return (hashtbl); 303 } 304 305 void 306 hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) 307 { 308 LIST_HEAD(generic, generic) *hashtbl, *hp; 309 310 hashtbl = vhashtbl; 311 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 312 KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__)); 313 kfree(hashtbl, type); 314 } 315 316 /* 317 * This is a newer version which allocates a hash table of structures. 318 * 319 * The returned array will be zero'd. The caller is responsible for 320 * initializing the structures. 321 */ 322 void * 323 hashinit_ext(int elements, size_t size, struct malloc_type *type, 324 u_long *hashmask) 325 { 326 long hashsize; 327 void *hashtbl; 328 329 if (elements <= 0) 330 panic("hashinit: bad elements"); 331 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 332 continue; 333 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 334 *hashmask = hashsize - 1; 335 return (hashtbl); 336 } 337 338 static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 339 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 340 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 341 #define NPRIMES NELEM(primes) 342 343 /* 344 * General routine to allocate a prime number sized hash table. 345 */ 346 void * 347 phashinit(int elements, struct malloc_type *type, u_long *nentries) 348 { 349 long hashsize; 350 LIST_HEAD(generic, generic) *hashtbl; 351 int i; 352 353 if (elements <= 0) 354 panic("phashinit: bad elements"); 355 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 356 i++; 357 if (i == NPRIMES) 358 break; 359 hashsize = primes[i]; 360 } 361 hashsize = primes[i - 1]; 362 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 363 for (i = 0; i < hashsize; i++) 364 LIST_INIT(&hashtbl[i]); 365 *nentries = hashsize; 366 return (hashtbl); 367 } 368 369 /* 370 * This is a newer version which allocates a hash table of structures 371 * in a prime-number size. 372 * 373 * The returned array will be zero'd. The caller is responsible for 374 * initializing the structures. 375 */ 376 void * 377 phashinit_ext(int elements, size_t size, struct malloc_type *type, 378 u_long *nentries) 379 { 380 long hashsize; 381 void *hashtbl; 382 int i; 383 384 if (elements <= 0) 385 panic("phashinit: bad elements"); 386 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 387 i++; 388 if (i == NPRIMES) 389 break; 390 hashsize = primes[i]; 391 } 392 hashsize = primes[i - 1]; 393 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 394 *nentries = hashsize; 395 return (hashtbl); 396 } 397 398 /* 399 * Copyin an iovec. If the iovec array fits, use the preallocated small 400 * iovec structure. If it is too big, dynamically allocate an iovec array 401 * of sufficient size. 402 * 403 * MPSAFE 404 */ 405 int 406 iovec_copyin(struct iovec *uiov, struct iovec **kiov, struct iovec *siov, 407 size_t iov_cnt, size_t *iov_len) 408 { 409 struct iovec *iovp; 410 int error, i; 411 size_t len; 412 413 if (iov_cnt > UIO_MAXIOV) 414 return EMSGSIZE; 415 if (iov_cnt > UIO_SMALLIOV) { 416 *kiov = kmalloc(sizeof(struct iovec) * iov_cnt, M_IOV, 417 M_WAITOK); 418 } else { 419 *kiov = siov; 420 } 421 error = copyin(uiov, *kiov, iov_cnt * sizeof(struct iovec)); 422 if (error == 0) { 423 *iov_len = 0; 424 for (i = 0, iovp = *kiov; i < iov_cnt; i++, iovp++) { 425 /* 426 * Check for both *iov_len overflows and out of 427 * range iovp->iov_len's. We limit to the 428 * capabilities of signed integers. 429 * 430 * GCC4 - overflow check opt requires assign/test. 431 */ 432 len = *iov_len + iovp->iov_len; 433 if (len < *iov_len) 434 error = EINVAL; 435 *iov_len = len; 436 } 437 } 438 439 /* 440 * From userland disallow iovec's which exceed the sized size 441 * limit as the system calls return ssize_t. 442 * 443 * NOTE: Internal kernel interfaces can handle the unsigned 444 * limit. 445 */ 446 if (error == 0 && (ssize_t)*iov_len < 0) 447 error = EINVAL; 448 449 if (error) 450 iovec_free(kiov, siov); 451 return (error); 452 } 453 454 455 /* 456 * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> 457 * Copyright (c) 1982, 1986, 1991, 1993 458 * The Regents of the University of California. All rights reserved. 459 * (c) UNIX System Laboratories, Inc. 460 * All or some portions of this file are derived from material licensed 461 * to the University of California by American Telephone and Telegraph 462 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 463 * the permission of UNIX System Laboratories, Inc. 464 * 465 * Redistribution and use in source and binary forms, with or without 466 * modification, are permitted provided that the following conditions 467 * are met: 468 * 1. Redistributions of source code must retain the above copyright 469 * notice, this list of conditions and the following disclaimer. 470 * 2. Redistributions in binary form must reproduce the above copyright 471 * notice, this list of conditions and the following disclaimer in the 472 * documentation and/or other materials provided with the distribution. 473 * 4. Neither the name of the University nor the names of its contributors 474 * may be used to endorse or promote products derived from this software 475 * without specific prior written permission. 476 * 477 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 478 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 479 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 480 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 481 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 482 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 483 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 484 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 485 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 486 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 487 * SUCH DAMAGE. 488 * 489 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 490 * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $ 491 */ 492 493 /* 494 * Implement uiomove(9) from physical memory using lwbuf's to reduce 495 * the creation and destruction of ephemeral mappings. 496 */ 497 int 498 uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, size_t n, struct uio *uio) 499 { 500 struct lwbuf lwb_cache; 501 struct lwbuf *lwb; 502 struct thread *td = curthread; 503 struct iovec *iov; 504 void *cp; 505 vm_offset_t page_offset; 506 vm_page_t m; 507 size_t cnt; 508 int error = 0; 509 int save = 0; 510 511 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 512 ("uiomove_fromphys: mode")); 513 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 514 ("uiomove_fromphys proc")); 515 516 crit_enter(); 517 save = td->td_flags & TDF_DEADLKTREAT; 518 td->td_flags |= TDF_DEADLKTREAT; 519 crit_exit(); 520 521 while (n > 0 && uio->uio_resid) { 522 iov = uio->uio_iov; 523 cnt = iov->iov_len; 524 if (cnt == 0) { 525 uio->uio_iov++; 526 uio->uio_iovcnt--; 527 continue; 528 } 529 if (cnt > n) 530 cnt = n; 531 page_offset = offset & PAGE_MASK; 532 cnt = min(cnt, PAGE_SIZE - page_offset); 533 m = ma[offset >> PAGE_SHIFT]; 534 lwb = lwbuf_alloc(m, &lwb_cache); 535 cp = (char *)lwbuf_kva(lwb) + page_offset; 536 switch (uio->uio_segflg) { 537 case UIO_USERSPACE: 538 /* 539 * note: removed uioyield (it was the wrong place to 540 * put it). 541 */ 542 if (uio->uio_rw == UIO_READ) 543 error = copyout(cp, iov->iov_base, cnt); 544 else 545 error = copyin(iov->iov_base, cp, cnt); 546 if (error) { 547 lwbuf_free(lwb); 548 goto out; 549 } 550 break; 551 case UIO_SYSSPACE: 552 if (uio->uio_rw == UIO_READ) 553 bcopy(cp, iov->iov_base, cnt); 554 else 555 bcopy(iov->iov_base, cp, cnt); 556 break; 557 case UIO_NOCOPY: 558 break; 559 } 560 lwbuf_free(lwb); 561 iov->iov_base = (char *)iov->iov_base + cnt; 562 iov->iov_len -= cnt; 563 uio->uio_resid -= cnt; 564 uio->uio_offset += cnt; 565 offset += cnt; 566 n -= cnt; 567 } 568 out: 569 if (save == 0) { 570 crit_enter(); 571 td->td_flags &= ~TDF_DEADLKTREAT; 572 crit_exit(); 573 } 574 return (error); 575 } 576 577