1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012-2015 Ian Lepore 5 * Copyright (c) 2010 Mark Tinguely 6 * Copyright (c) 2004 Olivier Houchard 7 * Copyright (c) 2002 Peter Grehan 8 * Copyright (c) 1997, 1998 Justin T. Gibbs. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions, and the following disclaimer, 16 * without modification, immediately at the beginning of the file. 17 * 2. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/bus.h> 42 #include <sys/busdma_bufalloc.h> 43 #include <sys/counter.h> 44 #include <sys/interrupt.h> 45 #include <sys/kernel.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/memdesc.h> 49 #include <sys/proc.h> 50 #include <sys/mutex.h> 51 #include <sys/sysctl.h> 52 #include <sys/uio.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_param.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_phys.h> 58 #include <vm/vm_map.h> 59 #include <vm/vm_extern.h> 60 #include <vm/vm_kern.h> 61 62 #include <machine/atomic.h> 63 #include <machine/bus.h> 64 #include <machine/cpu.h> 65 #include <machine/md_var.h> 66 67 //#define ARM_BUSDMA_MAPLOAD_STATS 68 69 #define BUSDMA_DCACHE_ALIGN cpuinfo.dcache_line_size 70 #define BUSDMA_DCACHE_MASK cpuinfo.dcache_line_mask 71 72 #define MAX_BPAGES 64 73 #define MAX_DMA_SEGMENTS 4096 74 #define BUS_DMA_EXCL_BOUNCE BUS_DMA_BUS2 75 #define BUS_DMA_ALIGN_BOUNCE BUS_DMA_BUS3 76 #define BUS_DMA_COULD_BOUNCE (BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE) 77 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 78 79 struct bounce_page; 80 struct bounce_zone; 81 82 struct bus_dma_tag { 83 bus_dma_tag_t parent; 84 bus_size_t alignment; 85 bus_addr_t boundary; 86 bus_addr_t lowaddr; 87 bus_addr_t highaddr; 88 bus_dma_filter_t *filter; 89 void *filterarg; 90 bus_size_t maxsize; 91 u_int nsegments; 92 bus_size_t maxsegsz; 93 int flags; 94 int ref_count; 95 int map_count; 96 bus_dma_lock_t *lockfunc; 97 void *lockfuncarg; 98 struct bounce_zone *bounce_zone; 99 }; 100 101 struct sync_list { 102 vm_offset_t vaddr; /* kva of client data */ 103 bus_addr_t paddr; /* physical address */ 104 vm_page_t pages; /* starting page of client data */ 105 bus_size_t datacount; /* client data count */ 106 }; 107 108 static uint32_t tags_total; 109 static uint32_t maps_total; 110 static uint32_t maps_dmamem; 111 static uint32_t maps_coherent; 112 #ifdef ARM_BUSDMA_MAPLOAD_STATS 113 static counter_u64_t maploads_total; 114 static counter_u64_t maploads_bounced; 115 static counter_u64_t maploads_coherent; 116 static counter_u64_t maploads_dmamem; 117 static counter_u64_t maploads_mbuf; 118 static counter_u64_t maploads_physmem; 119 #endif 120 121 SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 122 "Busdma parameters"); 123 SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, 124 "Number of active tags"); 125 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, 126 "Number of active maps"); 127 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, 128 "Number of active maps for bus_dmamem_alloc buffers"); 129 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, 130 "Number of active maps with BUS_DMA_COHERENT flag set"); 131 #ifdef ARM_BUSDMA_MAPLOAD_STATS 132 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, 133 &maploads_total, "Number of load operations performed"); 134 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, 135 &maploads_bounced, "Number of load operations that used bounce buffers"); 136 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, 137 &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory"); 138 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, 139 &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers"); 140 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, 141 &maploads_mbuf, "Number of load operations for mbufs"); 142 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, 143 &maploads_physmem, "Number of load operations on physical buffers"); 144 #endif 145 146 struct bus_dmamap { 147 STAILQ_HEAD(, bounce_page) bpages; 148 int pagesneeded; 149 int pagesreserved; 150 bus_dma_tag_t dmat; 151 struct memdesc mem; 152 bus_dmamap_callback_t *callback; 153 void *callback_arg; 154 int flags; 155 #define DMAMAP_COHERENT (1 << 0) 156 #define DMAMAP_DMAMEM_ALLOC (1 << 1) 157 #define DMAMAP_MBUF (1 << 2) 158 STAILQ_ENTRY(bus_dmamap) links; 159 bus_dma_segment_t *segments; 160 int sync_count; 161 struct sync_list slist[]; 162 }; 163 164 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, 165 bus_dmamap_t map, void *buf, bus_size_t buflen, int flags); 166 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, 167 vm_paddr_t buf, bus_size_t buflen, int flags); 168 static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size); 169 static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op); 170 171 static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ 172 static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ 173 174 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); 175 176 #define dmat_alignment(dmat) ((dmat)->alignment) 177 #define dmat_flags(dmat) ((dmat)->flags) 178 #define dmat_lowaddr(dmat) ((dmat)->lowaddr) 179 #define dmat_lockfunc(dmat) ((dmat)->lockfunc) 180 #define dmat_lockfuncarg(dmat) ((dmat)->lockfuncarg) 181 182 #include "../../kern/subr_busdma_bounce.c" 183 184 static void 185 busdma_init(void *dummy) 186 { 187 int uma_flags; 188 189 #ifdef ARM_BUSDMA_MAPLOAD_STATS 190 maploads_total = counter_u64_alloc(M_WAITOK); 191 maploads_bounced = counter_u64_alloc(M_WAITOK); 192 maploads_coherent = counter_u64_alloc(M_WAITOK); 193 maploads_dmamem = counter_u64_alloc(M_WAITOK); 194 maploads_mbuf = counter_u64_alloc(M_WAITOK); 195 maploads_physmem = counter_u64_alloc(M_WAITOK); 196 #endif 197 198 uma_flags = 0; 199 200 /* Create a cache of buffers in standard (cacheable) memory. */ 201 standard_allocator = busdma_bufalloc_create("buffer", 202 BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ 203 NULL, /* uma_alloc func */ 204 NULL, /* uma_free func */ 205 uma_flags); /* uma_zcreate_flags */ 206 207 #ifdef INVARIANTS 208 /* 209 * Force UMA zone to allocate service structures like 210 * slabs using own allocator. uma_debug code performs 211 * atomic ops on uma_slab_t fields and safety of this 212 * operation is not guaranteed for write-back caches 213 */ 214 uma_flags = UMA_ZONE_NOTOUCH; 215 #endif 216 /* 217 * Create a cache of buffers in uncacheable memory, to implement the 218 * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. 219 */ 220 coherent_allocator = busdma_bufalloc_create("coherent", 221 BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ 222 busdma_bufalloc_alloc_uncacheable, 223 busdma_bufalloc_free_uncacheable, 224 uma_flags); /* uma_zcreate_flags */ 225 } 226 227 /* 228 * This init historically used SI_SUB_VM, but now the init code requires 229 * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get 230 * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by 231 * using SI_SUB_KMEM+1. 232 */ 233 SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL); 234 235 /* 236 * This routine checks the exclusion zone constraints from a tag against the 237 * physical RAM available on the machine. If a tag specifies an exclusion zone 238 * but there's no RAM in that zone, then we avoid allocating resources to bounce 239 * a request, and we can use any memory allocator (as opposed to needing 240 * kmem_alloc_contig() just because it can allocate pages in an address range). 241 * 242 * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the 243 * same value on 32-bit architectures) as their lowaddr constraint, and we can't 244 * possibly have RAM at an address higher than the highest address we can 245 * express, so we take a fast out. 246 */ 247 static int 248 exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr) 249 { 250 int i; 251 252 if (lowaddr >= BUS_SPACE_MAXADDR) 253 return (0); 254 255 for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { 256 if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) || 257 (lowaddr < phys_avail[i] && highaddr >= phys_avail[i])) 258 return (1); 259 } 260 return (0); 261 } 262 263 /* 264 * Return true if the tag has an exclusion zone that could lead to bouncing. 265 */ 266 static __inline int 267 exclusion_bounce(bus_dma_tag_t dmat) 268 { 269 270 return (dmat->flags & BUS_DMA_EXCL_BOUNCE); 271 } 272 273 /* 274 * Return true if the given address does not fall on the alignment boundary. 275 */ 276 static __inline int 277 alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) 278 { 279 280 return (!vm_addr_align_ok(addr, dmat->alignment)); 281 } 282 283 /* 284 * Return true if the DMA should bounce because the start or end does not fall 285 * on a cacheline boundary (which would require a partial cacheline flush). 286 * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by 287 * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a 288 * strict rule that such memory cannot be accessed by the CPU while DMA is in 289 * progress (or by multiple DMA engines at once), so that it's always safe to do 290 * full cacheline flushes even if that affects memory outside the range of a 291 * given DMA operation that doesn't involve the full allocated buffer. If we're 292 * mapping an mbuf, that follows the same rules as a buffer we allocated. 293 */ 294 static __inline int 295 cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size) 296 { 297 298 if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF)) 299 return (0); 300 return ((addr | size) & BUSDMA_DCACHE_MASK); 301 } 302 303 /* 304 * Return true if we might need to bounce the DMA described by addr and size. 305 * 306 * This is used to quick-check whether we need to do the more expensive work of 307 * checking the DMA page-by-page looking for alignment and exclusion bounces. 308 * 309 * Note that the addr argument might be either virtual or physical. It doesn't 310 * matter because we only look at the low-order bits, which are the same in both 311 * address spaces and maximum alignment of generic buffer is limited up to page 312 * size. 313 * Bouncing of buffers allocated by bus_dmamem_alloc()is not necessary, these 314 * always comply with the required rules (alignment, boundary, and address 315 * range). 316 */ 317 static __inline int 318 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, 319 bus_size_t size) 320 { 321 322 KASSERT(map->flags & DMAMAP_DMAMEM_ALLOC || 323 dmat->alignment <= PAGE_SIZE, 324 ("%s: unsupported alignment (0x%08lx) for buffer not " 325 "allocated by bus_dmamem_alloc()", 326 __func__, dmat->alignment)); 327 328 return (!(map->flags & DMAMAP_DMAMEM_ALLOC) && 329 ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || 330 alignment_bounce(dmat, addr) || 331 cacheline_bounce(map, addr, size))); 332 } 333 334 /* 335 * Return true if we must bounce the DMA described by paddr and size. 336 * 337 * Bouncing can be triggered by DMA that doesn't begin and end on cacheline 338 * boundaries, or doesn't begin on an alignment boundary, or falls within the 339 * exclusion zone of any tag in the ancestry chain. 340 * 341 * For exclusions, walk the chain of tags comparing paddr to the exclusion zone 342 * within each tag. If the tag has a filter function, use it to decide whether 343 * the DMA needs to bounce, otherwise any DMA within the zone bounces. 344 */ 345 static int 346 must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, 347 bus_size_t size) 348 { 349 350 if (cacheline_bounce(map, paddr, size)) 351 return (1); 352 353 /* 354 * The tag already contains ancestors' alignment restrictions so this 355 * check doesn't need to be inside the loop. 356 */ 357 if (alignment_bounce(dmat, paddr)) 358 return (1); 359 360 /* 361 * Even though each tag has an exclusion zone that is a superset of its 362 * own and all its ancestors' exclusions, the exclusion zone of each tag 363 * up the chain must be checked within the loop, because the busdma 364 * rules say the filter function is called only when the address lies 365 * within the low-highaddr range of the tag that filterfunc belongs to. 366 */ 367 while (dmat != NULL && exclusion_bounce(dmat)) { 368 if ((paddr >= dmat->lowaddr && paddr <= dmat->highaddr) && 369 (dmat->filter == NULL || 370 dmat->filter(dmat->filterarg, paddr) != 0)) 371 return (1); 372 dmat = dmat->parent; 373 } 374 375 return (0); 376 } 377 378 /* 379 * Allocate a device specific dma_tag. 380 */ 381 int 382 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 383 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 384 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, 385 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, 386 void *lockfuncarg, bus_dma_tag_t *dmat) 387 { 388 bus_dma_tag_t newtag; 389 int error = 0; 390 391 /* Basic sanity checking. */ 392 KASSERT(boundary == 0 || powerof2(boundary), 393 ("dma tag boundary %lu, must be a power of 2", boundary)); 394 KASSERT(boundary == 0 || boundary >= maxsegsz, 395 ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz)); 396 KASSERT(alignment != 0 && powerof2(alignment), 397 ("dma tag alignment %lu, must be non-zero power of 2", alignment)); 398 KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero")); 399 400 /* Return a NULL tag on failure */ 401 *dmat = NULL; 402 403 newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, 404 M_ZERO | M_NOWAIT); 405 if (newtag == NULL) { 406 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 407 __func__, newtag, 0, error); 408 return (ENOMEM); 409 } 410 411 newtag->parent = parent; 412 newtag->alignment = alignment; 413 newtag->boundary = boundary; 414 newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); 415 newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + 416 (PAGE_SIZE - 1); 417 newtag->filter = filter; 418 newtag->filterarg = filterarg; 419 newtag->maxsize = maxsize; 420 newtag->nsegments = nsegments; 421 newtag->maxsegsz = maxsegsz; 422 newtag->flags = flags; 423 newtag->ref_count = 1; /* Count ourself */ 424 newtag->map_count = 0; 425 if (lockfunc != NULL) { 426 newtag->lockfunc = lockfunc; 427 newtag->lockfuncarg = lockfuncarg; 428 } else { 429 newtag->lockfunc = _busdma_dflt_lock; 430 newtag->lockfuncarg = NULL; 431 } 432 433 /* Take into account any restrictions imposed by our parent tag */ 434 if (parent != NULL) { 435 newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); 436 newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); 437 newtag->alignment = MAX(parent->alignment, newtag->alignment); 438 newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE; 439 newtag->flags |= parent->flags & BUS_DMA_COHERENT; 440 if (newtag->boundary == 0) 441 newtag->boundary = parent->boundary; 442 else if (parent->boundary != 0) 443 newtag->boundary = MIN(parent->boundary, 444 newtag->boundary); 445 if (newtag->filter == NULL) { 446 /* 447 * Short circuit to looking at our parent directly 448 * since we have encapsulated all of its information 449 */ 450 newtag->filter = parent->filter; 451 newtag->filterarg = parent->filterarg; 452 newtag->parent = parent->parent; 453 } 454 if (newtag->parent != NULL) 455 atomic_add_int(&parent->ref_count, 1); 456 } 457 458 if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr)) 459 newtag->flags |= BUS_DMA_EXCL_BOUNCE; 460 if (alignment_bounce(newtag, 1)) 461 newtag->flags |= BUS_DMA_ALIGN_BOUNCE; 462 463 /* 464 * Any request can auto-bounce due to cacheline alignment, in addition 465 * to any alignment or boundary specifications in the tag, so if the 466 * ALLOCNOW flag is set, there's always work to do. 467 */ 468 if ((flags & BUS_DMA_ALLOCNOW) != 0) { 469 struct bounce_zone *bz; 470 /* 471 * Round size up to a full page, and add one more page because 472 * there can always be one more boundary crossing than the 473 * number of pages in a transfer. 474 */ 475 maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE; 476 477 if ((error = alloc_bounce_zone(newtag)) != 0) { 478 free(newtag, M_BUSDMA); 479 return (error); 480 } 481 bz = newtag->bounce_zone; 482 483 if (ptoa(bz->total_bpages) < maxsize) { 484 int pages; 485 486 pages = atop(maxsize) - bz->total_bpages; 487 488 /* Add pages to our bounce pool */ 489 if (alloc_bounce_pages(newtag, pages) < pages) 490 error = ENOMEM; 491 } 492 /* Performed initial allocation */ 493 newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; 494 } else 495 newtag->bounce_zone = NULL; 496 497 if (error != 0) { 498 free(newtag, M_BUSDMA); 499 } else { 500 atomic_add_32(&tags_total, 1); 501 *dmat = newtag; 502 } 503 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 504 __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); 505 return (error); 506 } 507 508 void 509 bus_dma_template_clone(bus_dma_template_t *t, bus_dma_tag_t dmat) 510 { 511 512 if (t == NULL || dmat == NULL) 513 return; 514 515 t->parent = dmat->parent; 516 t->alignment = dmat->alignment; 517 t->boundary = dmat->boundary; 518 t->lowaddr = dmat->lowaddr; 519 t->highaddr = dmat->highaddr; 520 t->maxsize = dmat->maxsize; 521 t->nsegments = dmat->nsegments; 522 t->maxsegsize = dmat->maxsegsz; 523 t->flags = dmat->flags; 524 t->lockfunc = dmat->lockfunc; 525 t->lockfuncarg = dmat->lockfuncarg; 526 } 527 528 int 529 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) 530 { 531 532 return (0); 533 } 534 535 int 536 bus_dma_tag_destroy(bus_dma_tag_t dmat) 537 { 538 #ifdef KTR 539 bus_dma_tag_t dmat_copy = dmat; 540 #endif 541 int error; 542 543 error = 0; 544 545 if (dmat != NULL) { 546 if (dmat->map_count != 0) { 547 error = EBUSY; 548 goto out; 549 } 550 551 while (dmat != NULL) { 552 bus_dma_tag_t parent; 553 554 parent = dmat->parent; 555 atomic_subtract_int(&dmat->ref_count, 1); 556 if (dmat->ref_count == 0) { 557 atomic_subtract_32(&tags_total, 1); 558 free(dmat, M_BUSDMA); 559 /* 560 * Last reference count, so 561 * release our reference 562 * count on our parent. 563 */ 564 dmat = parent; 565 } else 566 dmat = NULL; 567 } 568 } 569 out: 570 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); 571 return (error); 572 } 573 574 static int 575 allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp) 576 { 577 struct bounce_zone *bz; 578 int maxpages; 579 int error; 580 581 if (dmat->bounce_zone == NULL) 582 if ((error = alloc_bounce_zone(dmat)) != 0) 583 return (error); 584 bz = dmat->bounce_zone; 585 /* Initialize the new map */ 586 STAILQ_INIT(&(mapp->bpages)); 587 588 /* 589 * Attempt to add pages to our pool on a per-instance basis up to a sane 590 * limit. Even if the tag isn't flagged as COULD_BOUNCE due to 591 * alignment and boundary constraints, it could still auto-bounce due to 592 * cacheline alignment, which requires at most two bounce pages. 593 */ 594 if (dmat->flags & BUS_DMA_COULD_BOUNCE) 595 maxpages = MAX_BPAGES; 596 else 597 maxpages = 2 * bz->map_count; 598 if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || 599 (bz->map_count > 0 && bz->total_bpages < maxpages)) { 600 int pages; 601 602 pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1; 603 pages = MIN(maxpages - bz->total_bpages, pages); 604 pages = MAX(pages, 2); 605 if (alloc_bounce_pages(dmat, pages) < pages) 606 return (ENOMEM); 607 608 if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) 609 dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; 610 } 611 bz->map_count++; 612 return (0); 613 } 614 615 static bus_dmamap_t 616 allocate_map(bus_dma_tag_t dmat, int mflags) 617 { 618 int mapsize, segsize; 619 bus_dmamap_t map; 620 621 /* 622 * Allocate the map. The map structure ends with an embedded 623 * variable-sized array of sync_list structures. Following that 624 * we allocate enough extra space to hold the array of bus_dma_segments. 625 */ 626 KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, 627 ("cannot allocate %u dma segments (max is %u)", 628 dmat->nsegments, MAX_DMA_SEGMENTS)); 629 segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; 630 mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; 631 map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO); 632 if (map == NULL) { 633 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); 634 return (NULL); 635 } 636 map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); 637 STAILQ_INIT(&map->bpages); 638 return (map); 639 } 640 641 /* 642 * Allocate a handle for mapping from kva/uva/physical 643 * address space into bus device space. 644 */ 645 int 646 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 647 { 648 bus_dmamap_t map; 649 int error = 0; 650 651 *mapp = map = allocate_map(dmat, M_NOWAIT); 652 if (map == NULL) { 653 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); 654 return (ENOMEM); 655 } 656 657 /* 658 * Bouncing might be required if the driver asks for an exclusion 659 * region, a data alignment that is stricter than 1, or DMA that begins 660 * or ends with a partial cacheline. Whether bouncing will actually 661 * happen can't be known until mapping time, but we need to pre-allocate 662 * resources now because we might not be allowed to at mapping time. 663 */ 664 error = allocate_bz_and_pages(dmat, map); 665 if (error != 0) { 666 free(map, M_BUSDMA); 667 *mapp = NULL; 668 return (error); 669 } 670 if (map->flags & DMAMAP_COHERENT) 671 atomic_add_32(&maps_coherent, 1); 672 atomic_add_32(&maps_total, 1); 673 dmat->map_count++; 674 675 return (0); 676 } 677 678 /* 679 * Destroy a handle for mapping from kva/uva/physical 680 * address space into bus device space. 681 */ 682 int 683 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) 684 { 685 686 if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { 687 CTR3(KTR_BUSDMA, "%s: tag %p error %d", 688 __func__, dmat, EBUSY); 689 return (EBUSY); 690 } 691 if (dmat->bounce_zone) 692 dmat->bounce_zone->map_count--; 693 if (map->flags & DMAMAP_COHERENT) 694 atomic_subtract_32(&maps_coherent, 1); 695 atomic_subtract_32(&maps_total, 1); 696 free(map, M_BUSDMA); 697 dmat->map_count--; 698 CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); 699 return (0); 700 } 701 702 /* 703 * Allocate a piece of memory that can be efficiently mapped into bus device 704 * space based on the constraints listed in the dma tag. Returns a pointer to 705 * the allocated memory, and a pointer to an associated bus_dmamap. 706 */ 707 int 708 bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, 709 bus_dmamap_t *mapp) 710 { 711 busdma_bufalloc_t ba; 712 struct busdma_bufzone *bufzone; 713 bus_dmamap_t map; 714 vm_memattr_t memattr; 715 int mflags; 716 717 if (flags & BUS_DMA_NOWAIT) 718 mflags = M_NOWAIT; 719 else 720 mflags = M_WAITOK; 721 if (flags & BUS_DMA_ZERO) 722 mflags |= M_ZERO; 723 724 *mapp = map = allocate_map(dmat, mflags); 725 if (map == NULL) { 726 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 727 __func__, dmat, dmat->flags, ENOMEM); 728 return (ENOMEM); 729 } 730 map->flags = DMAMAP_DMAMEM_ALLOC; 731 732 /* For coherent memory, set the map flag that disables sync ops. */ 733 if (flags & BUS_DMA_COHERENT) 734 map->flags |= DMAMAP_COHERENT; 735 736 /* 737 * Choose a busdma buffer allocator based on memory type flags. 738 * If the tag's COHERENT flag is set, that means normal memory 739 * is already coherent, use the normal allocator. 740 */ 741 if ((flags & BUS_DMA_COHERENT) && 742 ((dmat->flags & BUS_DMA_COHERENT) == 0)) { 743 memattr = VM_MEMATTR_UNCACHEABLE; 744 ba = coherent_allocator; 745 } else { 746 memattr = VM_MEMATTR_DEFAULT; 747 ba = standard_allocator; 748 } 749 750 /* 751 * Try to find a bufzone in the allocator that holds a cache of buffers 752 * of the right size for this request. If the buffer is too big to be 753 * held in the allocator cache, this returns NULL. 754 */ 755 bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); 756 757 /* 758 * Allocate the buffer from the uma(9) allocator if... 759 * - It's small enough to be in the allocator (bufzone not NULL). 760 * - The alignment constraint isn't larger than the allocation size 761 * (the allocator aligns buffers to their size boundaries). 762 * - There's no need to handle lowaddr/highaddr exclusion zones. 763 * else allocate non-contiguous pages if... 764 * - The page count that could get allocated doesn't exceed 765 * nsegments also when the maximum segment size is less 766 * than PAGE_SIZE. 767 * - The alignment constraint isn't larger than a page boundary. 768 * - There are no boundary-crossing constraints. 769 * else allocate a block of contiguous pages because one or more of the 770 * constraints is something that only the contig allocator can fulfill. 771 */ 772 if (bufzone != NULL && dmat->alignment <= bufzone->size && 773 !exclusion_bounce(dmat)) { 774 *vaddr = uma_zalloc(bufzone->umazone, mflags); 775 } else if (dmat->nsegments >= 776 howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && 777 dmat->alignment <= PAGE_SIZE && 778 (dmat->boundary % PAGE_SIZE) == 0) { 779 *vaddr = kmem_alloc_attr(dmat->maxsize, mflags, 0, 780 dmat->lowaddr, memattr); 781 } else { 782 *vaddr = kmem_alloc_contig(dmat->maxsize, mflags, 0, 783 dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); 784 } 785 if (*vaddr == NULL) { 786 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 787 __func__, dmat, dmat->flags, ENOMEM); 788 free(map, M_BUSDMA); 789 *mapp = NULL; 790 return (ENOMEM); 791 } 792 if (map->flags & DMAMAP_COHERENT) 793 atomic_add_32(&maps_coherent, 1); 794 atomic_add_32(&maps_dmamem, 1); 795 atomic_add_32(&maps_total, 1); 796 dmat->map_count++; 797 798 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 799 __func__, dmat, dmat->flags, 0); 800 return (0); 801 } 802 803 /* 804 * Free a piece of memory that was allocated via bus_dmamem_alloc, along with 805 * its associated map. 806 */ 807 void 808 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) 809 { 810 struct busdma_bufzone *bufzone; 811 busdma_bufalloc_t ba; 812 813 if ((map->flags & DMAMAP_COHERENT) && 814 ((dmat->flags & BUS_DMA_COHERENT) == 0)) 815 ba = coherent_allocator; 816 else 817 ba = standard_allocator; 818 819 bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); 820 821 if (bufzone != NULL && dmat->alignment <= bufzone->size && 822 !exclusion_bounce(dmat)) 823 uma_zfree(bufzone->umazone, vaddr); 824 else 825 kmem_free(vaddr, dmat->maxsize); 826 827 dmat->map_count--; 828 if (map->flags & DMAMAP_COHERENT) 829 atomic_subtract_32(&maps_coherent, 1); 830 atomic_subtract_32(&maps_total, 1); 831 atomic_subtract_32(&maps_dmamem, 1); 832 free(map, M_BUSDMA); 833 CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); 834 } 835 836 static void 837 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, 838 bus_size_t buflen, int flags) 839 { 840 bus_addr_t curaddr; 841 bus_size_t sgsize; 842 843 if (map->pagesneeded == 0) { 844 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" 845 " map= %p, pagesneeded= %d", 846 dmat->lowaddr, dmat->boundary, dmat->alignment, 847 map, map->pagesneeded); 848 /* 849 * Count the number of bounce pages 850 * needed in order to complete this transfer 851 */ 852 curaddr = buf; 853 while (buflen != 0) { 854 sgsize = MIN(buflen, dmat->maxsegsz); 855 if (must_bounce(dmat, map, curaddr, sgsize) != 0) { 856 sgsize = MIN(sgsize, 857 PAGE_SIZE - (curaddr & PAGE_MASK)); 858 map->pagesneeded++; 859 } 860 curaddr += sgsize; 861 buflen -= sgsize; 862 } 863 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); 864 } 865 } 866 867 static void 868 _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, 869 void *buf, bus_size_t buflen, int flags) 870 { 871 vm_offset_t vaddr; 872 vm_offset_t vendaddr; 873 bus_addr_t paddr; 874 875 if (map->pagesneeded == 0) { 876 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" 877 " map= %p, pagesneeded= %d", 878 dmat->lowaddr, dmat->boundary, dmat->alignment, 879 map, map->pagesneeded); 880 /* 881 * Count the number of bounce pages 882 * needed in order to complete this transfer 883 */ 884 vaddr = (vm_offset_t)buf; 885 vendaddr = (vm_offset_t)buf + buflen; 886 887 while (vaddr < vendaddr) { 888 if (__predict_true(pmap == kernel_pmap)) 889 paddr = pmap_kextract(vaddr); 890 else 891 paddr = pmap_extract(pmap, vaddr); 892 if (must_bounce(dmat, map, paddr, 893 min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & 894 PAGE_MASK)))) != 0) { 895 map->pagesneeded++; 896 } 897 vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); 898 } 899 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); 900 } 901 } 902 903 /* 904 * Add a single contiguous physical range to the segment list. 905 */ 906 static int 907 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, 908 bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) 909 { 910 int seg; 911 912 /* 913 * Make sure we don't cross any boundaries. 914 */ 915 if (!vm_addr_bound_ok(curaddr, sgsize, dmat->boundary)) 916 sgsize = roundup2(curaddr, dmat->boundary) - curaddr; 917 918 /* 919 * Insert chunk into a segment, coalescing with 920 * previous segment if possible. 921 */ 922 seg = *segp; 923 if (seg == -1) { 924 seg = 0; 925 segs[seg].ds_addr = curaddr; 926 segs[seg].ds_len = sgsize; 927 } else { 928 if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && 929 (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && 930 vm_addr_bound_ok(segs[seg].ds_addr, 931 segs[seg].ds_len + sgsize, dmat->boundary)) 932 segs[seg].ds_len += sgsize; 933 else { 934 if (++seg >= dmat->nsegments) 935 return (0); 936 segs[seg].ds_addr = curaddr; 937 segs[seg].ds_len = sgsize; 938 } 939 } 940 *segp = seg; 941 return (sgsize); 942 } 943 944 /* 945 * Utility function to load a physical buffer. segp contains 946 * the starting segment on entrace, and the ending segment on exit. 947 */ 948 int 949 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, 950 bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) 951 { 952 bus_addr_t curaddr; 953 bus_addr_t sl_end = 0; 954 bus_size_t sgsize; 955 struct sync_list *sl; 956 int error; 957 958 if (segs == NULL) 959 segs = map->segments; 960 961 #ifdef ARM_BUSDMA_MAPLOAD_STATS 962 counter_u64_add(maploads_total, 1); 963 counter_u64_add(maploads_physmem, 1); 964 #endif 965 966 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { 967 _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); 968 if (map->pagesneeded != 0) { 969 #ifdef ARM_BUSDMA_MAPLOAD_STATS 970 counter_u64_add(maploads_bounced, 1); 971 #endif 972 error = _bus_dmamap_reserve_pages(dmat, map, flags); 973 if (error) 974 return (error); 975 } 976 } 977 978 sl = map->slist + map->sync_count - 1; 979 980 while (buflen > 0) { 981 curaddr = buf; 982 sgsize = MIN(buflen, dmat->maxsegsz); 983 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, 984 sgsize)) { 985 sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); 986 curaddr = add_bounce_page(dmat, map, 0, curaddr, 987 sgsize); 988 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { 989 if (map->sync_count > 0) 990 sl_end = sl->paddr + sl->datacount; 991 992 if (map->sync_count == 0 || curaddr != sl_end) { 993 if (++map->sync_count > dmat->nsegments) 994 break; 995 sl++; 996 sl->vaddr = 0; 997 sl->paddr = curaddr; 998 sl->datacount = sgsize; 999 sl->pages = PHYS_TO_VM_PAGE(curaddr); 1000 KASSERT(sl->pages != NULL, 1001 ("%s: page at PA:0x%08lx is not in " 1002 "vm_page_array", __func__, curaddr)); 1003 } else 1004 sl->datacount += sgsize; 1005 } 1006 sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, 1007 segp); 1008 if (sgsize == 0) 1009 break; 1010 buf += sgsize; 1011 buflen -= sgsize; 1012 } 1013 1014 /* 1015 * Did we fit? 1016 */ 1017 if (buflen != 0) { 1018 bus_dmamap_unload(dmat, map); 1019 return (EFBIG); /* XXX better return value here? */ 1020 } 1021 return (0); 1022 } 1023 1024 int 1025 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, 1026 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 1027 bus_dma_segment_t *segs, int *segp) 1028 { 1029 1030 return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, 1031 segs, segp)); 1032 } 1033 1034 /* 1035 * Utility function to load a linear buffer. segp contains 1036 * the starting segment on entrance, and the ending segment on exit. 1037 */ 1038 int 1039 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, 1040 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 1041 int *segp) 1042 { 1043 bus_size_t sgsize; 1044 bus_addr_t curaddr; 1045 bus_addr_t sl_pend = 0; 1046 vm_offset_t kvaddr, vaddr, sl_vend = 0; 1047 struct sync_list *sl; 1048 int error; 1049 1050 #ifdef ARM_BUSDMA_MAPLOAD_STATS 1051 counter_u64_add(maploads_total, 1); 1052 if (map->flags & DMAMAP_COHERENT) 1053 counter_u64_add(maploads_coherent, 1); 1054 if (map->flags & DMAMAP_DMAMEM_ALLOC) 1055 counter_u64_add(maploads_dmamem, 1); 1056 #endif 1057 1058 if (segs == NULL) 1059 segs = map->segments; 1060 1061 if (flags & BUS_DMA_LOAD_MBUF) { 1062 #ifdef ARM_BUSDMA_MAPLOAD_STATS 1063 counter_u64_add(maploads_mbuf, 1); 1064 #endif 1065 map->flags |= DMAMAP_MBUF; 1066 } 1067 1068 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { 1069 _bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags); 1070 if (map->pagesneeded != 0) { 1071 #ifdef ARM_BUSDMA_MAPLOAD_STATS 1072 counter_u64_add(maploads_bounced, 1); 1073 #endif 1074 error = _bus_dmamap_reserve_pages(dmat, map, flags); 1075 if (error) 1076 return (error); 1077 } 1078 } 1079 1080 sl = map->slist + map->sync_count - 1; 1081 vaddr = (vm_offset_t)buf; 1082 1083 while (buflen > 0) { 1084 /* 1085 * Get the physical address for this segment. 1086 */ 1087 if (__predict_true(pmap == kernel_pmap)) { 1088 curaddr = pmap_kextract(vaddr); 1089 kvaddr = vaddr; 1090 } else { 1091 curaddr = pmap_extract(pmap, vaddr); 1092 kvaddr = 0; 1093 } 1094 1095 /* 1096 * Compute the segment size, and adjust counts. 1097 */ 1098 sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); 1099 if (sgsize > dmat->maxsegsz) 1100 sgsize = dmat->maxsegsz; 1101 if (buflen < sgsize) 1102 sgsize = buflen; 1103 1104 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, 1105 sgsize)) { 1106 curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 1107 sgsize); 1108 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { 1109 if (map->sync_count > 0) { 1110 sl_pend = sl->paddr + sl->datacount; 1111 sl_vend = sl->vaddr + sl->datacount; 1112 } 1113 1114 if (map->sync_count == 0 || 1115 (kvaddr != 0 && kvaddr != sl_vend) || 1116 (curaddr != sl_pend)) { 1117 if (++map->sync_count > dmat->nsegments) 1118 goto cleanup; 1119 sl++; 1120 sl->vaddr = kvaddr; 1121 sl->paddr = curaddr; 1122 if (kvaddr != 0) { 1123 sl->pages = NULL; 1124 } else { 1125 sl->pages = PHYS_TO_VM_PAGE(curaddr); 1126 KASSERT(sl->pages != NULL, 1127 ("%s: page at PA:0x%08lx is not " 1128 "in vm_page_array", __func__, 1129 curaddr)); 1130 } 1131 sl->datacount = sgsize; 1132 } else 1133 sl->datacount += sgsize; 1134 } 1135 sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, 1136 segp); 1137 if (sgsize == 0) 1138 break; 1139 vaddr += sgsize; 1140 buflen -= sgsize; 1141 } 1142 1143 cleanup: 1144 /* 1145 * Did we fit? 1146 */ 1147 if (buflen != 0) { 1148 bus_dmamap_unload(dmat, map); 1149 return (EFBIG); /* XXX better return value here? */ 1150 } 1151 return (0); 1152 } 1153 1154 void 1155 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, 1156 bus_dmamap_callback_t *callback, void *callback_arg) 1157 { 1158 1159 map->mem = *mem; 1160 map->dmat = dmat; 1161 map->callback = callback; 1162 map->callback_arg = callback_arg; 1163 } 1164 1165 bus_dma_segment_t * 1166 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, 1167 bus_dma_segment_t *segs, int nsegs, int error) 1168 { 1169 1170 if (segs == NULL) 1171 segs = map->segments; 1172 return (segs); 1173 } 1174 1175 /* 1176 * Release the mapping held by map. 1177 */ 1178 void 1179 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) 1180 { 1181 struct bounce_zone *bz; 1182 1183 if ((bz = dmat->bounce_zone) != NULL) { 1184 free_bounce_pages(dmat, map); 1185 1186 if (map->pagesreserved != 0) { 1187 mtx_lock(&bounce_lock); 1188 bz->free_bpages += map->pagesreserved; 1189 bz->reserved_bpages -= map->pagesreserved; 1190 mtx_unlock(&bounce_lock); 1191 map->pagesreserved = 0; 1192 } 1193 map->pagesneeded = 0; 1194 } 1195 map->sync_count = 0; 1196 map->flags &= ~DMAMAP_MBUF; 1197 } 1198 1199 static void 1200 dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 1201 { 1202 /* 1203 * Write back any partial cachelines immediately before and 1204 * after the DMA region. We don't need to round the address 1205 * down to the nearest cacheline or specify the exact size, 1206 * as dcache_wb_poc() will do the rounding for us and works 1207 * at cacheline granularity. 1208 */ 1209 if (va & BUSDMA_DCACHE_MASK) 1210 dcache_wb_poc(va, pa, 1); 1211 if ((va + size) & BUSDMA_DCACHE_MASK) 1212 dcache_wb_poc(va + size, pa + size, 1); 1213 1214 dcache_inv_poc_dma(va, pa, size); 1215 } 1216 1217 static void 1218 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) 1219 { 1220 uint32_t len, offset; 1221 vm_page_t m; 1222 vm_paddr_t pa; 1223 vm_offset_t va, tempva; 1224 bus_size_t size; 1225 1226 offset = sl->paddr & PAGE_MASK; 1227 m = sl->pages; 1228 size = sl->datacount; 1229 pa = sl->paddr; 1230 1231 for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { 1232 tempva = 0; 1233 if (sl->vaddr == 0) { 1234 len = min(PAGE_SIZE - offset, size); 1235 tempva = pmap_quick_enter_page(m); 1236 va = tempva | offset; 1237 KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), 1238 ("unexpected vm_page_t phys: 0x%08x != 0x%08x", 1239 VM_PAGE_TO_PHYS(m) | offset, pa)); 1240 } else { 1241 len = sl->datacount; 1242 va = sl->vaddr; 1243 } 1244 1245 switch (op) { 1246 case BUS_DMASYNC_PREWRITE: 1247 case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: 1248 dcache_wb_poc(va, pa, len); 1249 break; 1250 case BUS_DMASYNC_PREREAD: 1251 /* 1252 * An mbuf may start in the middle of a cacheline. There 1253 * will be no cpu writes to the beginning of that line 1254 * (which contains the mbuf header) while dma is in 1255 * progress. Handle that case by doing a writeback of 1256 * just the first cacheline before invalidating the 1257 * overall buffer. Any mbuf in a chain may have this 1258 * misalignment. Buffers which are not mbufs bounce if 1259 * they are not aligned to a cacheline. 1260 */ 1261 dma_preread_safe(va, pa, len); 1262 break; 1263 case BUS_DMASYNC_POSTREAD: 1264 case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: 1265 dcache_inv_poc(va, pa, len); 1266 break; 1267 default: 1268 panic("unsupported combination of sync operations: " 1269 "0x%08x\n", op); 1270 } 1271 1272 if (tempva != 0) 1273 pmap_quick_remove_page(tempva); 1274 } 1275 } 1276 1277 void 1278 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) 1279 { 1280 struct bounce_page *bpage; 1281 struct sync_list *sl, *end; 1282 vm_offset_t datavaddr, tempvaddr; 1283 1284 if (op == BUS_DMASYNC_POSTWRITE) 1285 return; 1286 1287 /* 1288 * If the buffer was from user space, it is possible that this is not 1289 * the same vm map, especially on a POST operation. It's not clear that 1290 * dma on userland buffers can work at all right now. To be safe, until 1291 * we're able to test direct userland dma, panic on a map mismatch. 1292 */ 1293 if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { 1294 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " 1295 "performing bounce", __func__, dmat, dmat->flags, op); 1296 1297 /* 1298 * For PREWRITE do a writeback. Clean the caches from the 1299 * innermost to the outermost levels. 1300 */ 1301 if (op & BUS_DMASYNC_PREWRITE) { 1302 while (bpage != NULL) { 1303 tempvaddr = 0; 1304 datavaddr = bpage->datavaddr; 1305 if (datavaddr == 0) { 1306 tempvaddr = pmap_quick_enter_page( 1307 bpage->datapage); 1308 datavaddr = tempvaddr | bpage->dataoffs; 1309 } 1310 bcopy((void *)datavaddr, (void *)bpage->vaddr, 1311 bpage->datacount); 1312 if (tempvaddr != 0) 1313 pmap_quick_remove_page(tempvaddr); 1314 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1315 dcache_wb_poc(bpage->vaddr, 1316 bpage->busaddr, bpage->datacount); 1317 bpage = STAILQ_NEXT(bpage, links); 1318 } 1319 dmat->bounce_zone->total_bounced++; 1320 } 1321 1322 /* 1323 * Do an invalidate for PREREAD unless a writeback was already 1324 * done above due to PREWRITE also being set. The reason for a 1325 * PREREAD invalidate is to prevent dirty lines currently in the 1326 * cache from being evicted during the DMA. If a writeback was 1327 * done due to PREWRITE also being set there will be no dirty 1328 * lines and the POSTREAD invalidate handles the rest. The 1329 * invalidate is done from the innermost to outermost level. If 1330 * L2 were done first, a dirty cacheline could be automatically 1331 * evicted from L1 before we invalidated it, re-dirtying the L2. 1332 */ 1333 if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) { 1334 bpage = STAILQ_FIRST(&map->bpages); 1335 while (bpage != NULL) { 1336 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1337 dcache_inv_poc_dma(bpage->vaddr, 1338 bpage->busaddr, bpage->datacount); 1339 bpage = STAILQ_NEXT(bpage, links); 1340 } 1341 } 1342 1343 /* 1344 * Re-invalidate the caches on a POSTREAD, even though they were 1345 * already invalidated at PREREAD time. Aggressive prefetching 1346 * due to accesses to other data near the dma buffer could have 1347 * brought buffer data into the caches which is now stale. The 1348 * caches are invalidated from the outermost to innermost; the 1349 * prefetches could be happening right now, and if L1 were 1350 * invalidated first, stale L2 data could be prefetched into L1. 1351 */ 1352 if (op & BUS_DMASYNC_POSTREAD) { 1353 while (bpage != NULL) { 1354 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1355 dcache_inv_poc(bpage->vaddr, 1356 bpage->busaddr, bpage->datacount); 1357 tempvaddr = 0; 1358 datavaddr = bpage->datavaddr; 1359 if (datavaddr == 0) { 1360 tempvaddr = pmap_quick_enter_page( 1361 bpage->datapage); 1362 datavaddr = tempvaddr | bpage->dataoffs; 1363 } 1364 bcopy((void *)bpage->vaddr, (void *)datavaddr, 1365 bpage->datacount); 1366 if (tempvaddr != 0) 1367 pmap_quick_remove_page(tempvaddr); 1368 bpage = STAILQ_NEXT(bpage, links); 1369 } 1370 dmat->bounce_zone->total_bounced++; 1371 } 1372 } 1373 1374 /* 1375 * For COHERENT memory no cache maintenance is necessary, but ensure all 1376 * writes have reached memory for the PREWRITE case. No action is 1377 * needed for a PREREAD without PREWRITE also set, because that would 1378 * imply that the cpu had written to the COHERENT buffer and expected 1379 * the dma device to see that change, and by definition a PREWRITE sync 1380 * is required to make that happen. 1381 */ 1382 if (map->flags & DMAMAP_COHERENT) { 1383 if (op & BUS_DMASYNC_PREWRITE) { 1384 dsb(); 1385 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1386 cpu_l2cache_drain_writebuf(); 1387 } 1388 return; 1389 } 1390 1391 /* 1392 * Cache maintenance for normal (non-COHERENT non-bounce) buffers. All 1393 * the comments about the sequences for flushing cache levels in the 1394 * bounce buffer code above apply here as well. In particular, the fact 1395 * that the sequence is inner-to-outer for PREREAD invalidation and 1396 * outer-to-inner for POSTREAD invalidation is not a mistake. 1397 */ 1398 if (map->sync_count != 0) { 1399 sl = &map->slist[0]; 1400 end = &map->slist[map->sync_count]; 1401 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " 1402 "performing sync", __func__, dmat, dmat->flags, op); 1403 1404 for ( ; sl != end; ++sl) 1405 dma_dcache_sync(sl, op); 1406 } 1407 } 1408