1 /*- 2 * Copyright (c) 2008 Yahoo!, Inc. 3 * All rights reserved. 4 * Written by: John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of the author nor the names of any co-contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD: src/sys/kern/subr_sglist.c,v 1.3 2009/08/21 02:59:07 jhb Exp $ 31 */ 32 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/uio.h> 41 42 #include <vm/vm.h> 43 #include <vm/pmap.h> 44 #include <vm/vm_map.h> 45 46 #include <sys/ktr.h> 47 48 static MALLOC_DEFINE(M_SGLIST, "sglist", "scatter/gather lists"); 49 50 /* 51 * Convenience macros to save the state of an sglist so it can be restored 52 * if an append attempt fails. Since sglist's only grow we only need to 53 * save the current count of segments and the length of the ending segment. 54 * Earlier segments will not be changed by an append, and the only change 55 * that can occur to the ending segment is that it can be extended. 56 */ 57 struct sgsave { 58 u_short sg_nseg; 59 size_t ss_len; 60 }; 61 62 #define SGLIST_SAVE(sg, sgsave) do { \ 63 (sgsave).sg_nseg = (sg)->sg_nseg; \ 64 if ((sgsave).sg_nseg > 0) \ 65 (sgsave).ss_len = (sg)->sg_segs[(sgsave).sg_nseg - 1].ss_len; \ 66 else \ 67 (sgsave).ss_len = 0; \ 68 } while (0) 69 70 #define SGLIST_RESTORE(sg, sgsave) do { \ 71 (sg)->sg_nseg = (sgsave).sg_nseg; \ 72 if ((sgsave).sg_nseg > 0) \ 73 (sg)->sg_segs[(sgsave).sg_nseg - 1].ss_len = (sgsave).ss_len; \ 74 } while (0) 75 76 /* 77 * Append a single (paddr, len) to a sglist. sg is the list and ss is 78 * the current segment in the list. If we run out of segments then 79 * EFBIG will be returned. 80 */ 81 static __inline int 82 _sglist_append_range(struct sglist *sg, struct sglist_seg **ssp, 83 vm_paddr_t paddr, size_t len) 84 { 85 struct sglist_seg *ss; 86 87 ss = *ssp; 88 if (ss->ss_paddr + ss->ss_len == paddr) 89 ss->ss_len += len; 90 else { 91 if (sg->sg_nseg == sg->sg_maxseg) 92 return (EFBIG); 93 ss++; 94 ss->ss_paddr = paddr; 95 ss->ss_len = len; 96 sg->sg_nseg++; 97 *ssp = ss; 98 } 99 return (0); 100 } 101 102 /* 103 * Worker routine to append a virtual address range (either kernel or 104 * user) to a scatter/gather list. 105 */ 106 static __inline int 107 _sglist_append_buf(struct sglist *sg, void *buf, size_t len, pmap_t pmap, 108 size_t *donep) 109 { 110 struct sglist_seg *ss; 111 vm_offset_t vaddr, offset; 112 vm_paddr_t paddr; 113 void *handle; 114 size_t seglen; 115 int error; 116 117 if (donep) 118 *donep = 0; 119 if (len == 0) 120 return (0); 121 122 /* Do the first page. It may have an offset. */ 123 vaddr = (vm_offset_t)buf; 124 offset = vaddr & PAGE_MASK; 125 if (pmap != NULL) { 126 paddr = pmap_extract(pmap, vaddr, &handle); 127 } else { 128 paddr = pmap_kextract(vaddr); 129 handle = NULL; 130 } 131 seglen = MIN(len, PAGE_SIZE - offset); 132 if (sg->sg_nseg == 0) { 133 ss = sg->sg_segs; 134 ss->ss_paddr = paddr; 135 ss->ss_len = seglen; 136 sg->sg_nseg = 1; 137 } else { 138 ss = &sg->sg_segs[sg->sg_nseg - 1]; 139 error = _sglist_append_range(sg, &ss, paddr, seglen); 140 if (error) { 141 pmap_extract_done(handle); 142 return (error); 143 } 144 } 145 pmap_extract_done(handle); 146 vaddr += seglen; 147 len -= seglen; 148 if (donep) 149 *donep += seglen; 150 151 while (len > 0) { 152 seglen = MIN(len, PAGE_SIZE); 153 if (pmap != NULL) { 154 paddr = pmap_extract(pmap, vaddr, &handle); 155 error = _sglist_append_range(sg, &ss, paddr, seglen); 156 pmap_extract_done(handle); 157 } else { 158 paddr = pmap_kextract(vaddr); 159 error = _sglist_append_range(sg, &ss, paddr, seglen); 160 } 161 if (error) 162 return (error); 163 vaddr += seglen; 164 len -= seglen; 165 if (donep) 166 *donep += seglen; 167 } 168 169 return (0); 170 } 171 172 /* 173 * Determine the number of scatter/gather list elements needed to 174 * describe a kernel virtual address range. 175 */ 176 int 177 sglist_count(void *buf, size_t len) 178 { 179 vm_offset_t vaddr, vendaddr; 180 vm_paddr_t lastaddr, paddr; 181 int nsegs; 182 183 if (len == 0) 184 return (0); 185 186 vaddr = trunc_page((vm_offset_t)buf); 187 vendaddr = (vm_offset_t)buf + len; 188 nsegs = 1; 189 lastaddr = pmap_kextract(vaddr); 190 vaddr += PAGE_SIZE; 191 while (vaddr < vendaddr) { 192 paddr = pmap_kextract(vaddr); 193 if (lastaddr + PAGE_SIZE != paddr) 194 nsegs++; 195 lastaddr = paddr; 196 vaddr += PAGE_SIZE; 197 } 198 return (nsegs); 199 } 200 201 /* 202 * Allocate a scatter/gather list along with 'nsegs' segments. The 203 * 'mflags' parameters are the same as passed to kmalloc(9). The caller 204 * should use sglist_free() to free this list. 205 */ 206 struct sglist * 207 sglist_alloc(int nsegs, int mflags) 208 { 209 struct sglist *sg; 210 211 sg = kmalloc(sizeof(struct sglist) + nsegs * sizeof(struct sglist_seg), 212 M_SGLIST, mflags); 213 if (sg == NULL) 214 return (NULL); 215 sglist_init(sg, nsegs, (struct sglist_seg *)(sg + 1)); 216 return (sg); 217 } 218 219 /* 220 * Free a scatter/gather list allocated via sglist_allc(). 221 */ 222 void 223 sglist_free(struct sglist *sg) 224 { 225 226 if (refcount_release(&sg->sg_refs)) 227 kfree(sg, M_SGLIST); 228 } 229 230 /* 231 * Append the segments to describe a single kernel virtual address 232 * range to a scatter/gather list. If there are insufficient 233 * segments, then this fails with EFBIG. 234 */ 235 int 236 sglist_append(struct sglist *sg, void *buf, size_t len) 237 { 238 struct sgsave save; 239 int error; 240 241 if (sg->sg_maxseg == 0) 242 return (EINVAL); 243 SGLIST_SAVE(sg, save); 244 error = _sglist_append_buf(sg, buf, len, NULL, NULL); 245 if (error) 246 SGLIST_RESTORE(sg, save); 247 return (error); 248 } 249 250 /* 251 * Append a single physical address range to a scatter/gather list. 252 * If there are insufficient segments, then this fails with EFBIG. 253 */ 254 int 255 sglist_append_phys(struct sglist *sg, vm_paddr_t paddr, size_t len) 256 { 257 struct sglist_seg *ss; 258 struct sgsave save; 259 int error; 260 261 if (sg->sg_maxseg == 0) 262 return (EINVAL); 263 if (len == 0) 264 return (0); 265 266 if (sg->sg_nseg == 0) { 267 sg->sg_segs[0].ss_paddr = paddr; 268 sg->sg_segs[0].ss_len = len; 269 sg->sg_nseg = 1; 270 return (0); 271 } 272 ss = &sg->sg_segs[sg->sg_nseg - 1]; 273 SGLIST_SAVE(sg, save); 274 error = _sglist_append_range(sg, &ss, paddr, len); 275 if (error) 276 SGLIST_RESTORE(sg, save); 277 return (error); 278 } 279 280 /* 281 * Append the segments that describe a single mbuf chain to a 282 * scatter/gather list. If there are insufficient segments, then this 283 * fails with EFBIG. 284 */ 285 int 286 sglist_append_mbuf(struct sglist *sg, struct mbuf *m0) 287 { 288 struct sgsave save; 289 struct mbuf *m; 290 int error; 291 292 if (sg->sg_maxseg == 0) 293 return (EINVAL); 294 295 error = 0; 296 SGLIST_SAVE(sg, save); 297 for (m = m0; m != NULL; m = m->m_next) { 298 if (m->m_len > 0) { 299 error = sglist_append(sg, m->m_data, m->m_len); 300 if (error) { 301 SGLIST_RESTORE(sg, save); 302 return (error); 303 } 304 } 305 } 306 return (0); 307 } 308 309 /* 310 * Append the segments that describe a single user address range to a 311 * scatter/gather list. If there are insufficient segments, then this 312 * fails with EFBIG. 313 */ 314 int 315 sglist_append_user(struct sglist *sg, void *buf, size_t len, struct thread *td) 316 { 317 struct sgsave save; 318 int error; 319 320 if (sg->sg_maxseg == 0) 321 return (EINVAL); 322 SGLIST_SAVE(sg, save); 323 error = _sglist_append_buf(sg, buf, len, 324 vmspace_pmap(td->td_proc->p_vmspace), NULL); 325 if (error) 326 SGLIST_RESTORE(sg, save); 327 return (error); 328 } 329 330 /* 331 * Append the segments that describe a single uio to a scatter/gather 332 * list. If there are insufficient segments, then this fails with 333 * EFBIG. 334 */ 335 int 336 sglist_append_uio(struct sglist *sg, struct uio *uio) 337 { 338 struct iovec *iov; 339 struct sgsave save; 340 size_t resid, minlen; 341 pmap_t pmap; 342 int error, i; 343 344 if (sg->sg_maxseg == 0) 345 return (EINVAL); 346 347 resid = uio->uio_resid; 348 iov = uio->uio_iov; 349 350 if (uio->uio_segflg == UIO_USERSPACE) { 351 KASSERT(uio->uio_td != NULL, 352 ("sglist_append_uio: USERSPACE but no thread")); 353 pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace); 354 } else 355 pmap = NULL; 356 357 error = 0; 358 SGLIST_SAVE(sg, save); 359 for (i = 0; i < uio->uio_iovcnt && resid != 0; i++) { 360 /* 361 * Now at the first iovec to load. Load each iovec 362 * until we have exhausted the residual count. 363 */ 364 minlen = MIN(resid, iov[i].iov_len); 365 if (minlen > 0) { 366 error = _sglist_append_buf(sg, iov[i].iov_base, minlen, 367 pmap, NULL); 368 if (error) { 369 SGLIST_RESTORE(sg, save); 370 return (error); 371 } 372 resid -= minlen; 373 } 374 } 375 return (0); 376 } 377 378 /* 379 * Append the segments that describe at most 'resid' bytes from a 380 * single uio to a scatter/gather list. If there are insufficient 381 * segments, then only the amount that fits is appended. 382 */ 383 int 384 sglist_consume_uio(struct sglist *sg, struct uio *uio, size_t resid) 385 { 386 struct iovec *iov; 387 size_t done; 388 pmap_t pmap; 389 int error, len; 390 391 if (sg->sg_maxseg == 0) 392 return (EINVAL); 393 394 if (uio->uio_segflg == UIO_USERSPACE) { 395 KASSERT(uio->uio_td != NULL, 396 ("sglist_consume_uio: USERSPACE but no thread")); 397 pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace); 398 } else 399 pmap = NULL; 400 401 error = 0; 402 while (resid > 0 && uio->uio_resid) { 403 iov = uio->uio_iov; 404 len = iov->iov_len; 405 if (len == 0) { 406 uio->uio_iov++; 407 uio->uio_iovcnt--; 408 continue; 409 } 410 if (len > resid) 411 len = resid; 412 413 /* 414 * Try to append this iovec. If we run out of room, 415 * then break out of the loop. 416 */ 417 error = _sglist_append_buf(sg, iov->iov_base, len, pmap, &done); 418 iov->iov_base = (char *)iov->iov_base + done; 419 iov->iov_len -= done; 420 uio->uio_resid -= done; 421 uio->uio_offset += done; 422 resid -= done; 423 if (error) 424 break; 425 } 426 return (0); 427 } 428 429 /* 430 * Allocate and populate a scatter/gather list to describe a single 431 * kernel virtual address range. 432 */ 433 struct sglist * 434 sglist_build(void *buf, size_t len, int mflags) 435 { 436 struct sglist *sg; 437 int nsegs; 438 439 if (len == 0) 440 return (NULL); 441 442 nsegs = sglist_count(buf, len); 443 sg = sglist_alloc(nsegs, mflags); 444 if (sg == NULL) 445 return (NULL); 446 if (sglist_append(sg, buf, len) != 0) { 447 sglist_free(sg); 448 return (NULL); 449 } 450 return (sg); 451 } 452 453 /* 454 * Clone a new copy of a scatter/gather list. 455 */ 456 struct sglist * 457 sglist_clone(struct sglist *sg, int mflags) 458 { 459 struct sglist *new; 460 461 if (sg == NULL) 462 return (NULL); 463 new = sglist_alloc(sg->sg_maxseg, mflags); 464 if (new == NULL) 465 return (NULL); 466 new->sg_nseg = sg->sg_nseg; 467 bcopy(sg->sg_segs, new->sg_segs, sizeof(struct sglist_seg) * 468 sg->sg_nseg); 469 return (new); 470 } 471 472 /* 473 * Calculate the total length of the segments described in a 474 * scatter/gather list. 475 */ 476 size_t 477 sglist_length(struct sglist *sg) 478 { 479 size_t space; 480 int i; 481 482 space = 0; 483 for (i = 0; i < sg->sg_nseg; i++) 484 space += sg->sg_segs[i].ss_len; 485 return (space); 486 } 487 488 /* 489 * Split a scatter/gather list into two lists. The scatter/gather 490 * entries for the first 'length' bytes of the 'original' list are 491 * stored in the '*head' list and are removed from 'original'. 492 * 493 * If '*head' is NULL, then a new list will be allocated using 494 * 'mflags'. If M_NOWAIT is specified and the allocation fails, 495 * ENOMEM will be returned. 496 * 497 * If '*head' is not NULL, it should point to an empty sglist. If it 498 * does not have enough room for the remaining space, then EFBIG will 499 * be returned. If '*head' is not empty, then EINVAL will be 500 * returned. 501 * 502 * If 'original' is shared (refcount > 1), then EDOOFUS will be 503 * returned. 504 */ 505 int 506 sglist_split(struct sglist *original, struct sglist **head, size_t length, 507 int mflags) 508 { 509 struct sglist *sg; 510 size_t space, split; 511 int count, i; 512 513 if (original->sg_refs > 1) 514 return (EDOOFUS); 515 516 /* Figure out how big of a sglist '*head' has to hold. */ 517 count = 0; 518 space = 0; 519 split = 0; 520 for (i = 0; i < original->sg_nseg; i++) { 521 space += original->sg_segs[i].ss_len; 522 count++; 523 if (space >= length) { 524 /* 525 * If 'length' falls in the middle of a 526 * scatter/gather list entry, then 'split' 527 * holds how much of that entry will remain in 528 * 'original'. 529 */ 530 split = space - length; 531 break; 532 } 533 } 534 535 /* Nothing to do, so leave head empty. */ 536 if (count == 0) 537 return (0); 538 539 if (*head == NULL) { 540 sg = sglist_alloc(count, mflags); 541 if (sg == NULL) 542 return (ENOMEM); 543 *head = sg; 544 } else { 545 sg = *head; 546 if (sg->sg_maxseg < count) 547 return (EFBIG); 548 if (sg->sg_nseg != 0) 549 return (EINVAL); 550 } 551 552 /* Copy 'count' entries to 'sg' from 'original'. */ 553 bcopy(original->sg_segs, sg->sg_segs, count * 554 sizeof(struct sglist_seg)); 555 sg->sg_nseg = count; 556 557 /* 558 * If we had to split a list entry, fixup the last entry in 559 * 'sg' and the new first entry in 'original'. We also 560 * decrement 'count' by 1 since we will only be removing 561 * 'count - 1' segments from 'original' now. 562 */ 563 if (split != 0) { 564 count--; 565 sg->sg_segs[count].ss_len -= split; 566 original->sg_segs[count].ss_paddr = 567 sg->sg_segs[count].ss_paddr + split; 568 original->sg_segs[count].ss_len = split; 569 } 570 571 /* Trim 'count' entries from the front of 'original'. */ 572 original->sg_nseg -= count; 573 bcopy(original->sg_segs + count, original->sg_segs, count * 574 sizeof(struct sglist_seg)); 575 return (0); 576 } 577 578 /* 579 * Append the scatter/gather list elements in 'second' to the 580 * scatter/gather list 'first'. If there is not enough space in 581 * 'first', EFBIG is returned. 582 */ 583 int 584 sglist_join(struct sglist *first, struct sglist *second) 585 { 586 struct sglist_seg *flast, *sfirst; 587 int append; 588 589 /* If 'second' is empty, there is nothing to do. */ 590 if (second->sg_nseg == 0) 591 return (0); 592 593 /* 594 * If the first entry in 'second' can be appended to the last entry 595 * in 'first' then set append to '1'. 596 */ 597 append = 0; 598 flast = &first->sg_segs[first->sg_nseg - 1]; 599 sfirst = &second->sg_segs[0]; 600 if (first->sg_nseg != 0 && 601 flast->ss_paddr + flast->ss_len == sfirst->ss_paddr) 602 append = 1; 603 604 /* Make sure 'first' has enough room. */ 605 if (first->sg_nseg + second->sg_nseg - append > first->sg_maxseg) 606 return (EFBIG); 607 608 /* Merge last in 'first' and first in 'second' if needed. */ 609 if (append) 610 flast->ss_len += sfirst->ss_len; 611 612 /* Append new segments from 'second' to 'first'. */ 613 bcopy(first->sg_segs + first->sg_nseg, second->sg_segs + append, 614 (second->sg_nseg - append) * sizeof(struct sglist_seg)); 615 first->sg_nseg += second->sg_nseg - append; 616 sglist_reset(second); 617 return (0); 618 } 619 620 /* 621 * Generate a new scatter/gather list from a range of an existing 622 * scatter/gather list. The 'offset' and 'length' parameters specify 623 * the logical range of the 'original' list to extract. If that range 624 * is not a subset of the length of 'original', then EINVAL is 625 * returned. The new scatter/gather list is stored in '*slice'. 626 * 627 * If '*slice' is NULL, then a new list will be allocated using 628 * 'mflags'. If M_NOWAIT is specified and the allocation fails, 629 * ENOMEM will be returned. 630 * 631 * If '*slice' is not NULL, it should point to an empty sglist. If it 632 * does not have enough room for the remaining space, then EFBIG will 633 * be returned. If '*slice' is not empty, then EINVAL will be 634 * returned. 635 */ 636 int 637 sglist_slice(struct sglist *original, struct sglist **slice, size_t offset, 638 size_t length, int mflags) 639 { 640 struct sglist *sg; 641 size_t space, end, foffs, loffs; 642 int count, i, fseg; 643 644 /* Nothing to do. */ 645 if (length == 0) 646 return (0); 647 648 /* Figure out how many segments '*slice' needs to have. */ 649 end = offset + length; 650 space = 0; 651 count = 0; 652 fseg = 0; 653 foffs = loffs = 0; 654 for (i = 0; i < original->sg_nseg; i++) { 655 space += original->sg_segs[i].ss_len; 656 if (space > offset) { 657 /* 658 * When we hit the first segment, store its index 659 * in 'fseg' and the offset into the first segment 660 * of 'offset' in 'foffs'. 661 */ 662 if (count == 0) { 663 fseg = i; 664 foffs = offset - (space - 665 original->sg_segs[i].ss_len); 666 } 667 count++; 668 669 /* 670 * When we hit the last segment, break out of 671 * the loop. Store the amount of extra space 672 * at the end of this segment in 'loffs'. 673 */ 674 if (space >= end) { 675 loffs = space - end; 676 break; 677 } 678 } 679 } 680 681 /* If we never hit 'end', then 'length' ran off the end, so fail. */ 682 if (space < end) 683 return (EINVAL); 684 685 if (*slice == NULL) { 686 sg = sglist_alloc(count, mflags); 687 if (sg == NULL) 688 return (ENOMEM); 689 *slice = sg; 690 } else { 691 sg = *slice; 692 if (sg->sg_maxseg < count) 693 return (EFBIG); 694 if (sg->sg_nseg != 0) 695 return (EINVAL); 696 } 697 698 /* 699 * Copy over 'count' segments from 'original' starting at 700 * 'fseg' to 'sg'. 701 */ 702 bcopy(original->sg_segs + fseg, sg->sg_segs, 703 count * sizeof(struct sglist_seg)); 704 sg->sg_nseg = count; 705 706 /* Fixup first and last segments if needed. */ 707 if (foffs != 0) { 708 sg->sg_segs[0].ss_paddr += foffs; 709 sg->sg_segs[0].ss_len -= foffs; 710 } 711 if (loffs != 0) { 712 sg->sg_segs[count - 1].ss_len -= loffs; 713 } 714 return (0); 715 } 716