1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012-2015 Ian Lepore 5 * Copyright (c) 2010 Mark Tinguely 6 * Copyright (c) 2004 Olivier Houchard 7 * Copyright (c) 2002 Peter Grehan 8 * Copyright (c) 1997, 1998 Justin T. Gibbs. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions, and the following disclaimer, 16 * without modification, immediately at the beginning of the file. 17 * 2. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 #include <sys/bus.h> 39 #include <sys/busdma_bufalloc.h> 40 #include <sys/counter.h> 41 #include <sys/interrupt.h> 42 #include <sys/kernel.h> 43 #include <sys/ktr.h> 44 #include <sys/lock.h> 45 #include <sys/memdesc.h> 46 #include <sys/proc.h> 47 #include <sys/mutex.h> 48 #include <sys/sysctl.h> 49 #include <sys/uio.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_page.h> 54 #include <vm/vm_phys.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 #include <vm/vm_kern.h> 58 59 #include <machine/atomic.h> 60 #include <machine/bus.h> 61 #include <machine/cpu.h> 62 #include <machine/md_var.h> 63 64 //#define ARM_BUSDMA_MAPLOAD_STATS 65 66 #define BUSDMA_DCACHE_ALIGN cpuinfo.dcache_line_size 67 #define BUSDMA_DCACHE_MASK cpuinfo.dcache_line_mask 68 69 #define MAX_BPAGES 64 70 #define MAX_DMA_SEGMENTS 4096 71 #define BUS_DMA_EXCL_BOUNCE BUS_DMA_BUS2 72 #define BUS_DMA_ALIGN_BOUNCE BUS_DMA_BUS3 73 #define BUS_DMA_COULD_BOUNCE (BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE) 74 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 75 76 struct bounce_page; 77 struct bounce_zone; 78 79 struct bus_dma_tag { 80 bus_size_t alignment; 81 bus_addr_t boundary; 82 bus_addr_t lowaddr; 83 bus_addr_t highaddr; 84 bus_size_t maxsize; 85 u_int nsegments; 86 bus_size_t maxsegsz; 87 int flags; 88 int map_count; 89 bus_dma_lock_t *lockfunc; 90 void *lockfuncarg; 91 struct bounce_zone *bounce_zone; 92 }; 93 94 struct sync_list { 95 vm_offset_t vaddr; /* kva of client data */ 96 bus_addr_t paddr; /* physical address */ 97 vm_page_t pages; /* starting page of client data */ 98 bus_size_t datacount; /* client data count */ 99 }; 100 101 static uint32_t tags_total; 102 static uint32_t maps_total; 103 static uint32_t maps_dmamem; 104 static uint32_t maps_coherent; 105 #ifdef ARM_BUSDMA_MAPLOAD_STATS 106 static counter_u64_t maploads_total; 107 static counter_u64_t maploads_bounced; 108 static counter_u64_t maploads_coherent; 109 static counter_u64_t maploads_dmamem; 110 static counter_u64_t maploads_mbuf; 111 static counter_u64_t maploads_physmem; 112 #endif 113 114 SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 115 "Busdma parameters"); 116 SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, 117 "Number of active tags"); 118 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, 119 "Number of active maps"); 120 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, 121 "Number of active maps for bus_dmamem_alloc buffers"); 122 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, 123 "Number of active maps with BUS_DMA_COHERENT flag set"); 124 #ifdef ARM_BUSDMA_MAPLOAD_STATS 125 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, 126 &maploads_total, "Number of load operations performed"); 127 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, 128 &maploads_bounced, "Number of load operations that used bounce buffers"); 129 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, 130 &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory"); 131 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, 132 &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers"); 133 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, 134 &maploads_mbuf, "Number of load operations for mbufs"); 135 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, 136 &maploads_physmem, "Number of load operations on physical buffers"); 137 #endif 138 139 struct bus_dmamap { 140 STAILQ_HEAD(, bounce_page) bpages; 141 int pagesneeded; 142 int pagesreserved; 143 bus_dma_tag_t dmat; 144 struct memdesc mem; 145 bus_dmamap_callback_t *callback; 146 void *callback_arg; 147 __sbintime_t queued_time; 148 int flags; 149 #define DMAMAP_COHERENT (1 << 0) 150 #define DMAMAP_DMAMEM_ALLOC (1 << 1) 151 #define DMAMAP_MBUF (1 << 2) 152 STAILQ_ENTRY(bus_dmamap) links; 153 bus_dma_segment_t *segments; 154 int sync_count; 155 struct sync_list slist[]; 156 }; 157 158 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, 159 bus_dmamap_t map, void *buf, bus_size_t buflen, int flags); 160 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, 161 vm_paddr_t buf, bus_size_t buflen, int flags); 162 static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size); 163 static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op); 164 165 static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ 166 static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ 167 168 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); 169 170 #define dmat_alignment(dmat) ((dmat)->alignment) 171 #define dmat_flags(dmat) ((dmat)->flags) 172 #define dmat_highaddr(dmat) ((dmat)->highaddr) 173 #define dmat_lowaddr(dmat) ((dmat)->lowaddr) 174 #define dmat_lockfunc(dmat) ((dmat)->lockfunc) 175 #define dmat_lockfuncarg(dmat) ((dmat)->lockfuncarg) 176 177 #include "../../kern/subr_busdma_bounce.c" 178 179 static void 180 busdma_init(void *dummy) 181 { 182 int uma_flags; 183 184 #ifdef ARM_BUSDMA_MAPLOAD_STATS 185 maploads_total = counter_u64_alloc(M_WAITOK); 186 maploads_bounced = counter_u64_alloc(M_WAITOK); 187 maploads_coherent = counter_u64_alloc(M_WAITOK); 188 maploads_dmamem = counter_u64_alloc(M_WAITOK); 189 maploads_mbuf = counter_u64_alloc(M_WAITOK); 190 maploads_physmem = counter_u64_alloc(M_WAITOK); 191 #endif 192 193 uma_flags = 0; 194 195 /* Create a cache of buffers in standard (cacheable) memory. */ 196 standard_allocator = busdma_bufalloc_create("buffer", 197 BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ 198 NULL, /* uma_alloc func */ 199 NULL, /* uma_free func */ 200 uma_flags); /* uma_zcreate_flags */ 201 202 #ifdef INVARIANTS 203 /* 204 * Force UMA zone to allocate service structures like 205 * slabs using own allocator. uma_debug code performs 206 * atomic ops on uma_slab_t fields and safety of this 207 * operation is not guaranteed for write-back caches 208 */ 209 uma_flags = UMA_ZONE_NOTOUCH; 210 #endif 211 /* 212 * Create a cache of buffers in uncacheable memory, to implement the 213 * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. 214 */ 215 coherent_allocator = busdma_bufalloc_create("coherent", 216 BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ 217 busdma_bufalloc_alloc_uncacheable, 218 busdma_bufalloc_free_uncacheable, 219 uma_flags); /* uma_zcreate_flags */ 220 } 221 222 /* 223 * This init historically used SI_SUB_VM, but now the init code requires 224 * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get 225 * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by 226 * using SI_SUB_KMEM+1. 227 */ 228 SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL); 229 230 /* 231 * This routine checks the exclusion zone constraints from a tag against the 232 * physical RAM available on the machine. If a tag specifies an exclusion zone 233 * but there's no RAM in that zone, then we avoid allocating resources to bounce 234 * a request, and we can use any memory allocator (as opposed to needing 235 * kmem_alloc_contig() just because it can allocate pages in an address range). 236 * 237 * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the 238 * same value on 32-bit architectures) as their lowaddr constraint, and we can't 239 * possibly have RAM at an address higher than the highest address we can 240 * express, so we take a fast out. 241 */ 242 static int 243 exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr) 244 { 245 int i; 246 247 if (lowaddr >= BUS_SPACE_MAXADDR) 248 return (0); 249 250 for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { 251 if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) || 252 (lowaddr < phys_avail[i] && highaddr >= phys_avail[i])) 253 return (1); 254 } 255 return (0); 256 } 257 258 /* 259 * Return true if the tag has an exclusion zone that could lead to bouncing. 260 */ 261 static __inline int 262 exclusion_bounce(bus_dma_tag_t dmat) 263 { 264 265 return (dmat->flags & BUS_DMA_EXCL_BOUNCE); 266 } 267 268 /* 269 * Return true if the given address does not fall on the alignment boundary. 270 */ 271 static __inline int 272 alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) 273 { 274 275 return (!vm_addr_align_ok(addr, dmat->alignment)); 276 } 277 278 /* 279 * Return true if the DMA should bounce because the start or end does not fall 280 * on a cacheline boundary (which would require a partial cacheline flush). 281 * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by 282 * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a 283 * strict rule that such memory cannot be accessed by the CPU while DMA is in 284 * progress (or by multiple DMA engines at once), so that it's always safe to do 285 * full cacheline flushes even if that affects memory outside the range of a 286 * given DMA operation that doesn't involve the full allocated buffer. If we're 287 * mapping an mbuf, that follows the same rules as a buffer we allocated. 288 */ 289 static __inline int 290 cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size) 291 { 292 293 if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF)) 294 return (0); 295 return ((addr | size) & BUSDMA_DCACHE_MASK); 296 } 297 298 /* 299 * Return true if we might need to bounce the DMA described by addr and size. 300 * 301 * This is used to quick-check whether we need to do the more expensive work of 302 * checking the DMA page-by-page looking for alignment and exclusion bounces. 303 * 304 * Note that the addr argument might be either virtual or physical. It doesn't 305 * matter because we only look at the low-order bits, which are the same in both 306 * address spaces and maximum alignment of generic buffer is limited up to page 307 * size. 308 * Bouncing of buffers allocated by bus_dmamem_alloc()is not necessary, these 309 * always comply with the required rules (alignment, boundary, and address 310 * range). 311 */ 312 static __inline int 313 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, 314 bus_size_t size) 315 { 316 317 KASSERT(map->flags & DMAMAP_DMAMEM_ALLOC || 318 dmat->alignment <= PAGE_SIZE, 319 ("%s: unsupported alignment (0x%08lx) for buffer not " 320 "allocated by bus_dmamem_alloc()", 321 __func__, dmat->alignment)); 322 323 return (!(map->flags & DMAMAP_DMAMEM_ALLOC) && 324 ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || 325 alignment_bounce(dmat, addr) || 326 cacheline_bounce(map, addr, size))); 327 } 328 329 /* 330 * Return true if we must bounce the DMA described by paddr and size. 331 * 332 * Bouncing can be triggered by DMA that doesn't begin and end on cacheline 333 * boundaries, or doesn't begin on an alignment boundary, or falls within the 334 * exclusion zone of the tag. 335 */ 336 static int 337 must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, 338 bus_size_t size) 339 { 340 341 if (cacheline_bounce(map, paddr, size)) 342 return (1); 343 344 /* 345 * Check the tag's exclusion zone. 346 */ 347 if (exclusion_bounce(dmat) && addr_needs_bounce(dmat, paddr)) 348 return (1); 349 350 return (0); 351 } 352 353 /* 354 * Allocate a device specific dma_tag. 355 */ 356 int 357 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 358 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 359 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, 360 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, 361 void *lockfuncarg, bus_dma_tag_t *dmat) 362 { 363 bus_dma_tag_t newtag; 364 int error = 0; 365 366 /* Basic sanity checking. */ 367 KASSERT(boundary == 0 || powerof2(boundary), 368 ("dma tag boundary %lu, must be a power of 2", boundary)); 369 KASSERT(boundary == 0 || boundary >= maxsegsz, 370 ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz)); 371 KASSERT(alignment != 0 && powerof2(alignment), 372 ("dma tag alignment %lu, must be non-zero power of 2", alignment)); 373 KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero")); 374 375 /* Return a NULL tag on failure */ 376 *dmat = NULL; 377 378 /* Filters are no longer supported. */ 379 if (filter != NULL || filterarg != NULL) 380 return (EINVAL); 381 382 newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, 383 M_ZERO | M_NOWAIT); 384 if (newtag == NULL) { 385 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 386 __func__, newtag, 0, error); 387 return (ENOMEM); 388 } 389 390 newtag->alignment = alignment; 391 newtag->boundary = boundary; 392 newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); 393 newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + 394 (PAGE_SIZE - 1); 395 newtag->maxsize = maxsize; 396 newtag->nsegments = nsegments; 397 newtag->maxsegsz = maxsegsz; 398 newtag->flags = flags; 399 newtag->map_count = 0; 400 if (lockfunc != NULL) { 401 newtag->lockfunc = lockfunc; 402 newtag->lockfuncarg = lockfuncarg; 403 } else { 404 newtag->lockfunc = _busdma_dflt_lock; 405 newtag->lockfuncarg = NULL; 406 } 407 408 /* Take into account any restrictions imposed by our parent tag */ 409 if (parent != NULL) { 410 newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); 411 newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); 412 newtag->alignment = MAX(parent->alignment, newtag->alignment); 413 newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE; 414 newtag->flags |= parent->flags & BUS_DMA_COHERENT; 415 if (newtag->boundary == 0) 416 newtag->boundary = parent->boundary; 417 else if (parent->boundary != 0) 418 newtag->boundary = MIN(parent->boundary, 419 newtag->boundary); 420 } 421 422 if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr)) 423 newtag->flags |= BUS_DMA_EXCL_BOUNCE; 424 if (alignment_bounce(newtag, 1)) 425 newtag->flags |= BUS_DMA_ALIGN_BOUNCE; 426 427 /* 428 * Any request can auto-bounce due to cacheline alignment, in addition 429 * to any alignment or boundary specifications in the tag, so if the 430 * ALLOCNOW flag is set, there's always work to do. 431 */ 432 if ((flags & BUS_DMA_ALLOCNOW) != 0) { 433 struct bounce_zone *bz; 434 /* 435 * Round size up to a full page, and add one more page because 436 * there can always be one more boundary crossing than the 437 * number of pages in a transfer. 438 */ 439 maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE; 440 441 if ((error = alloc_bounce_zone(newtag)) != 0) { 442 free(newtag, M_BUSDMA); 443 return (error); 444 } 445 bz = newtag->bounce_zone; 446 447 if (ptoa(bz->total_bpages) < maxsize) { 448 int pages; 449 450 pages = atop(maxsize) - bz->total_bpages; 451 452 /* Add pages to our bounce pool */ 453 if (alloc_bounce_pages(newtag, pages) < pages) 454 error = ENOMEM; 455 } 456 /* Performed initial allocation */ 457 newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; 458 } else 459 newtag->bounce_zone = NULL; 460 461 if (error != 0) { 462 free(newtag, M_BUSDMA); 463 } else { 464 atomic_add_32(&tags_total, 1); 465 *dmat = newtag; 466 } 467 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 468 __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); 469 return (error); 470 } 471 472 void 473 bus_dma_template_clone(bus_dma_template_t *t, bus_dma_tag_t dmat) 474 { 475 476 if (t == NULL || dmat == NULL) 477 return; 478 479 t->alignment = dmat->alignment; 480 t->boundary = dmat->boundary; 481 t->lowaddr = dmat->lowaddr; 482 t->highaddr = dmat->highaddr; 483 t->maxsize = dmat->maxsize; 484 t->nsegments = dmat->nsegments; 485 t->maxsegsize = dmat->maxsegsz; 486 t->flags = dmat->flags; 487 t->lockfunc = dmat->lockfunc; 488 t->lockfuncarg = dmat->lockfuncarg; 489 } 490 491 int 492 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) 493 { 494 495 return (0); 496 } 497 498 int 499 bus_dma_tag_destroy(bus_dma_tag_t dmat) 500 { 501 int error = 0; 502 503 if (dmat != NULL) { 504 if (dmat->map_count != 0) { 505 error = EBUSY; 506 goto out; 507 } 508 free(dmat, M_BUSDMA); 509 } 510 out: 511 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error); 512 return (error); 513 } 514 515 static int 516 allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp) 517 { 518 struct bounce_zone *bz; 519 int maxpages; 520 int error; 521 522 if (dmat->bounce_zone == NULL) 523 if ((error = alloc_bounce_zone(dmat)) != 0) 524 return (error); 525 bz = dmat->bounce_zone; 526 /* Initialize the new map */ 527 STAILQ_INIT(&(mapp->bpages)); 528 529 /* 530 * Attempt to add pages to our pool on a per-instance basis up to a sane 531 * limit. Even if the tag isn't flagged as COULD_BOUNCE due to 532 * alignment and boundary constraints, it could still auto-bounce due to 533 * cacheline alignment, which requires at most two bounce pages. 534 */ 535 if (dmat->flags & BUS_DMA_COULD_BOUNCE) 536 maxpages = MAX_BPAGES; 537 else 538 maxpages = 2 * bz->map_count; 539 if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || 540 (bz->map_count > 0 && bz->total_bpages < maxpages)) { 541 int pages; 542 543 pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1; 544 pages = MIN(maxpages - bz->total_bpages, pages); 545 pages = MAX(pages, 2); 546 if (alloc_bounce_pages(dmat, pages) < pages) 547 return (ENOMEM); 548 549 if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) 550 dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; 551 } 552 bz->map_count++; 553 return (0); 554 } 555 556 static bus_dmamap_t 557 allocate_map(bus_dma_tag_t dmat, int mflags) 558 { 559 int mapsize, segsize; 560 bus_dmamap_t map; 561 562 /* 563 * Allocate the map. The map structure ends with an embedded 564 * variable-sized array of sync_list structures. Following that 565 * we allocate enough extra space to hold the array of bus_dma_segments. 566 */ 567 KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, 568 ("cannot allocate %u dma segments (max is %u)", 569 dmat->nsegments, MAX_DMA_SEGMENTS)); 570 segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; 571 mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; 572 map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO); 573 if (map == NULL) { 574 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); 575 return (NULL); 576 } 577 map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); 578 STAILQ_INIT(&map->bpages); 579 return (map); 580 } 581 582 /* 583 * Allocate a handle for mapping from kva/uva/physical 584 * address space into bus device space. 585 */ 586 int 587 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 588 { 589 bus_dmamap_t map; 590 int error = 0; 591 592 *mapp = map = allocate_map(dmat, M_NOWAIT); 593 if (map == NULL) { 594 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); 595 return (ENOMEM); 596 } 597 598 /* 599 * Bouncing might be required if the driver asks for an exclusion 600 * region, a data alignment that is stricter than 1, or DMA that begins 601 * or ends with a partial cacheline. Whether bouncing will actually 602 * happen can't be known until mapping time, but we need to pre-allocate 603 * resources now because we might not be allowed to at mapping time. 604 */ 605 error = allocate_bz_and_pages(dmat, map); 606 if (error != 0) { 607 free(map, M_BUSDMA); 608 *mapp = NULL; 609 return (error); 610 } 611 if (map->flags & DMAMAP_COHERENT) 612 atomic_add_32(&maps_coherent, 1); 613 atomic_add_32(&maps_total, 1); 614 dmat->map_count++; 615 616 return (0); 617 } 618 619 /* 620 * Destroy a handle for mapping from kva/uva/physical 621 * address space into bus device space. 622 */ 623 int 624 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) 625 { 626 627 if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { 628 CTR3(KTR_BUSDMA, "%s: tag %p error %d", 629 __func__, dmat, EBUSY); 630 return (EBUSY); 631 } 632 if (dmat->bounce_zone) 633 dmat->bounce_zone->map_count--; 634 if (map->flags & DMAMAP_COHERENT) 635 atomic_subtract_32(&maps_coherent, 1); 636 atomic_subtract_32(&maps_total, 1); 637 free(map, M_BUSDMA); 638 dmat->map_count--; 639 CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); 640 return (0); 641 } 642 643 /* 644 * Allocate a piece of memory that can be efficiently mapped into bus device 645 * space based on the constraints listed in the dma tag. Returns a pointer to 646 * the allocated memory, and a pointer to an associated bus_dmamap. 647 */ 648 int 649 bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, 650 bus_dmamap_t *mapp) 651 { 652 busdma_bufalloc_t ba; 653 struct busdma_bufzone *bufzone; 654 bus_dmamap_t map; 655 vm_memattr_t memattr; 656 int mflags; 657 658 if (flags & BUS_DMA_NOWAIT) 659 mflags = M_NOWAIT; 660 else 661 mflags = M_WAITOK; 662 if (flags & BUS_DMA_ZERO) 663 mflags |= M_ZERO; 664 665 *mapp = map = allocate_map(dmat, mflags); 666 if (map == NULL) { 667 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 668 __func__, dmat, dmat->flags, ENOMEM); 669 return (ENOMEM); 670 } 671 map->flags = DMAMAP_DMAMEM_ALLOC; 672 673 /* For coherent memory, set the map flag that disables sync ops. */ 674 if (flags & BUS_DMA_COHERENT) 675 map->flags |= DMAMAP_COHERENT; 676 677 /* 678 * Choose a busdma buffer allocator based on memory type flags. 679 * If the tag's COHERENT flag is set, that means normal memory 680 * is already coherent, use the normal allocator. 681 */ 682 if ((flags & BUS_DMA_COHERENT) && 683 ((dmat->flags & BUS_DMA_COHERENT) == 0)) { 684 memattr = VM_MEMATTR_UNCACHEABLE; 685 ba = coherent_allocator; 686 } else { 687 memattr = VM_MEMATTR_DEFAULT; 688 ba = standard_allocator; 689 } 690 691 /* 692 * Try to find a bufzone in the allocator that holds a cache of buffers 693 * of the right size for this request. If the buffer is too big to be 694 * held in the allocator cache, this returns NULL. 695 */ 696 bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); 697 698 /* 699 * Allocate the buffer from the uma(9) allocator if... 700 * - It's small enough to be in the allocator (bufzone not NULL). 701 * - The alignment constraint isn't larger than the allocation size 702 * (the allocator aligns buffers to their size boundaries). 703 * - There's no need to handle lowaddr/highaddr exclusion zones. 704 * else allocate non-contiguous pages if... 705 * - The page count that could get allocated doesn't exceed 706 * nsegments also when the maximum segment size is less 707 * than PAGE_SIZE. 708 * - The alignment constraint isn't larger than a page boundary. 709 * - There are no boundary-crossing constraints. 710 * else allocate a block of contiguous pages because one or more of the 711 * constraints is something that only the contig allocator can fulfill. 712 */ 713 if (bufzone != NULL && dmat->alignment <= bufzone->size && 714 !exclusion_bounce(dmat)) { 715 *vaddr = uma_zalloc(bufzone->umazone, mflags); 716 } else if (dmat->nsegments >= 717 howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && 718 dmat->alignment <= PAGE_SIZE && 719 (dmat->boundary % PAGE_SIZE) == 0) { 720 *vaddr = kmem_alloc_attr(dmat->maxsize, mflags, 0, 721 dmat->lowaddr, memattr); 722 } else { 723 *vaddr = kmem_alloc_contig(dmat->maxsize, mflags, 0, 724 dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); 725 } 726 if (*vaddr == NULL) { 727 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 728 __func__, dmat, dmat->flags, ENOMEM); 729 free(map, M_BUSDMA); 730 *mapp = NULL; 731 return (ENOMEM); 732 } 733 if (map->flags & DMAMAP_COHERENT) 734 atomic_add_32(&maps_coherent, 1); 735 atomic_add_32(&maps_dmamem, 1); 736 atomic_add_32(&maps_total, 1); 737 dmat->map_count++; 738 739 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 740 __func__, dmat, dmat->flags, 0); 741 return (0); 742 } 743 744 /* 745 * Free a piece of memory that was allocated via bus_dmamem_alloc, along with 746 * its associated map. 747 */ 748 void 749 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) 750 { 751 struct busdma_bufzone *bufzone; 752 busdma_bufalloc_t ba; 753 754 if ((map->flags & DMAMAP_COHERENT) && 755 ((dmat->flags & BUS_DMA_COHERENT) == 0)) 756 ba = coherent_allocator; 757 else 758 ba = standard_allocator; 759 760 bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); 761 762 if (bufzone != NULL && dmat->alignment <= bufzone->size && 763 !exclusion_bounce(dmat)) 764 uma_zfree(bufzone->umazone, vaddr); 765 else 766 kmem_free(vaddr, dmat->maxsize); 767 768 dmat->map_count--; 769 if (map->flags & DMAMAP_COHERENT) 770 atomic_subtract_32(&maps_coherent, 1); 771 atomic_subtract_32(&maps_total, 1); 772 atomic_subtract_32(&maps_dmamem, 1); 773 free(map, M_BUSDMA); 774 CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); 775 } 776 777 static void 778 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, 779 bus_size_t buflen, int flags) 780 { 781 bus_addr_t curaddr; 782 bus_size_t sgsize; 783 784 if (map->pagesneeded == 0) { 785 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" 786 " map= %p, pagesneeded= %d", 787 dmat->lowaddr, dmat->boundary, dmat->alignment, 788 map, map->pagesneeded); 789 /* 790 * Count the number of bounce pages 791 * needed in order to complete this transfer 792 */ 793 curaddr = buf; 794 while (buflen != 0) { 795 sgsize = MIN(buflen, dmat->maxsegsz); 796 if (must_bounce(dmat, map, curaddr, sgsize) != 0) { 797 sgsize = MIN(sgsize, 798 PAGE_SIZE - (curaddr & PAGE_MASK)); 799 map->pagesneeded++; 800 } 801 curaddr += sgsize; 802 buflen -= sgsize; 803 } 804 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); 805 } 806 } 807 808 static void 809 _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, 810 void *buf, bus_size_t buflen, int flags) 811 { 812 vm_offset_t vaddr; 813 vm_offset_t vendaddr; 814 bus_addr_t paddr; 815 816 if (map->pagesneeded == 0) { 817 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" 818 " map= %p, pagesneeded= %d", 819 dmat->lowaddr, dmat->boundary, dmat->alignment, 820 map, map->pagesneeded); 821 /* 822 * Count the number of bounce pages 823 * needed in order to complete this transfer 824 */ 825 vaddr = (vm_offset_t)buf; 826 vendaddr = (vm_offset_t)buf + buflen; 827 828 while (vaddr < vendaddr) { 829 if (__predict_true(pmap == kernel_pmap)) 830 paddr = pmap_kextract(vaddr); 831 else 832 paddr = pmap_extract(pmap, vaddr); 833 if (must_bounce(dmat, map, paddr, 834 min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & 835 PAGE_MASK)))) != 0) { 836 map->pagesneeded++; 837 } 838 vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); 839 } 840 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); 841 } 842 } 843 844 /* 845 * Add a single contiguous physical range to the segment list. 846 */ 847 static int 848 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, 849 bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) 850 { 851 int seg; 852 853 /* 854 * Make sure we don't cross any boundaries. 855 */ 856 if (!vm_addr_bound_ok(curaddr, sgsize, dmat->boundary)) 857 sgsize = roundup2(curaddr, dmat->boundary) - curaddr; 858 859 /* 860 * Insert chunk into a segment, coalescing with 861 * previous segment if possible. 862 */ 863 seg = *segp; 864 if (seg == -1) { 865 seg = 0; 866 segs[seg].ds_addr = curaddr; 867 segs[seg].ds_len = sgsize; 868 } else { 869 if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && 870 (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && 871 vm_addr_bound_ok(segs[seg].ds_addr, 872 segs[seg].ds_len + sgsize, dmat->boundary)) 873 segs[seg].ds_len += sgsize; 874 else { 875 if (++seg >= dmat->nsegments) 876 return (0); 877 segs[seg].ds_addr = curaddr; 878 segs[seg].ds_len = sgsize; 879 } 880 } 881 *segp = seg; 882 return (sgsize); 883 } 884 885 /* 886 * Utility function to load a physical buffer. segp contains 887 * the starting segment on entrace, and the ending segment on exit. 888 */ 889 int 890 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, 891 bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) 892 { 893 bus_addr_t curaddr; 894 bus_addr_t sl_end = 0; 895 bus_size_t sgsize; 896 struct sync_list *sl; 897 int error; 898 899 if (segs == NULL) 900 segs = map->segments; 901 902 #ifdef ARM_BUSDMA_MAPLOAD_STATS 903 counter_u64_add(maploads_total, 1); 904 counter_u64_add(maploads_physmem, 1); 905 #endif 906 907 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { 908 _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); 909 if (map->pagesneeded != 0) { 910 #ifdef ARM_BUSDMA_MAPLOAD_STATS 911 counter_u64_add(maploads_bounced, 1); 912 #endif 913 error = _bus_dmamap_reserve_pages(dmat, map, flags); 914 if (error) 915 return (error); 916 } 917 } 918 919 sl = map->slist + map->sync_count - 1; 920 921 while (buflen > 0) { 922 curaddr = buf; 923 sgsize = MIN(buflen, dmat->maxsegsz); 924 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, 925 sgsize)) { 926 sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); 927 curaddr = add_bounce_page(dmat, map, 0, curaddr, 928 sgsize); 929 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { 930 if (map->sync_count > 0) 931 sl_end = sl->paddr + sl->datacount; 932 933 if (map->sync_count == 0 || curaddr != sl_end) { 934 if (++map->sync_count > dmat->nsegments) 935 break; 936 sl++; 937 sl->vaddr = 0; 938 sl->paddr = curaddr; 939 sl->datacount = sgsize; 940 sl->pages = PHYS_TO_VM_PAGE(curaddr); 941 KASSERT(sl->pages != NULL, 942 ("%s: page at PA:0x%08lx is not in " 943 "vm_page_array", __func__, curaddr)); 944 } else 945 sl->datacount += sgsize; 946 } 947 sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, 948 segp); 949 if (sgsize == 0) 950 break; 951 buf += sgsize; 952 buflen -= sgsize; 953 } 954 955 /* 956 * Did we fit? 957 */ 958 if (buflen != 0) { 959 bus_dmamap_unload(dmat, map); 960 return (EFBIG); /* XXX better return value here? */ 961 } 962 return (0); 963 } 964 965 int 966 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, 967 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 968 bus_dma_segment_t *segs, int *segp) 969 { 970 971 return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, 972 segs, segp)); 973 } 974 975 /* 976 * Utility function to load a linear buffer. segp contains 977 * the starting segment on entrance, and the ending segment on exit. 978 */ 979 int 980 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, 981 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 982 int *segp) 983 { 984 bus_size_t sgsize; 985 bus_addr_t curaddr; 986 bus_addr_t sl_pend = 0; 987 vm_offset_t kvaddr, vaddr, sl_vend = 0; 988 struct sync_list *sl; 989 int error; 990 991 #ifdef ARM_BUSDMA_MAPLOAD_STATS 992 counter_u64_add(maploads_total, 1); 993 if (map->flags & DMAMAP_COHERENT) 994 counter_u64_add(maploads_coherent, 1); 995 if (map->flags & DMAMAP_DMAMEM_ALLOC) 996 counter_u64_add(maploads_dmamem, 1); 997 #endif 998 999 if (segs == NULL) 1000 segs = map->segments; 1001 1002 if (flags & BUS_DMA_LOAD_MBUF) { 1003 #ifdef ARM_BUSDMA_MAPLOAD_STATS 1004 counter_u64_add(maploads_mbuf, 1); 1005 #endif 1006 map->flags |= DMAMAP_MBUF; 1007 } 1008 1009 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { 1010 _bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags); 1011 if (map->pagesneeded != 0) { 1012 #ifdef ARM_BUSDMA_MAPLOAD_STATS 1013 counter_u64_add(maploads_bounced, 1); 1014 #endif 1015 error = _bus_dmamap_reserve_pages(dmat, map, flags); 1016 if (error) 1017 return (error); 1018 } 1019 } 1020 1021 sl = map->slist + map->sync_count - 1; 1022 vaddr = (vm_offset_t)buf; 1023 1024 while (buflen > 0) { 1025 /* 1026 * Get the physical address for this segment. 1027 */ 1028 if (__predict_true(pmap == kernel_pmap)) { 1029 curaddr = pmap_kextract(vaddr); 1030 kvaddr = vaddr; 1031 } else { 1032 curaddr = pmap_extract(pmap, vaddr); 1033 kvaddr = 0; 1034 } 1035 1036 /* 1037 * Compute the segment size, and adjust counts. 1038 */ 1039 sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); 1040 if (sgsize > dmat->maxsegsz) 1041 sgsize = dmat->maxsegsz; 1042 if (buflen < sgsize) 1043 sgsize = buflen; 1044 1045 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, 1046 sgsize)) { 1047 curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 1048 sgsize); 1049 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { 1050 if (map->sync_count > 0) { 1051 sl_pend = sl->paddr + sl->datacount; 1052 sl_vend = sl->vaddr + sl->datacount; 1053 } 1054 1055 if (map->sync_count == 0 || 1056 (kvaddr != 0 && kvaddr != sl_vend) || 1057 (curaddr != sl_pend)) { 1058 if (++map->sync_count > dmat->nsegments) 1059 goto cleanup; 1060 sl++; 1061 sl->vaddr = kvaddr; 1062 sl->paddr = curaddr; 1063 if (kvaddr != 0) { 1064 sl->pages = NULL; 1065 } else { 1066 sl->pages = PHYS_TO_VM_PAGE(curaddr); 1067 KASSERT(sl->pages != NULL, 1068 ("%s: page at PA:0x%08lx is not " 1069 "in vm_page_array", __func__, 1070 curaddr)); 1071 } 1072 sl->datacount = sgsize; 1073 } else 1074 sl->datacount += sgsize; 1075 } 1076 sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, 1077 segp); 1078 if (sgsize == 0) 1079 break; 1080 vaddr += sgsize; 1081 buflen -= sgsize; 1082 } 1083 1084 cleanup: 1085 /* 1086 * Did we fit? 1087 */ 1088 if (buflen != 0) { 1089 bus_dmamap_unload(dmat, map); 1090 return (EFBIG); /* XXX better return value here? */ 1091 } 1092 return (0); 1093 } 1094 1095 void 1096 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, 1097 bus_dmamap_callback_t *callback, void *callback_arg) 1098 { 1099 1100 map->mem = *mem; 1101 map->dmat = dmat; 1102 map->callback = callback; 1103 map->callback_arg = callback_arg; 1104 } 1105 1106 bus_dma_segment_t * 1107 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, 1108 bus_dma_segment_t *segs, int nsegs, int error) 1109 { 1110 1111 if (segs == NULL) 1112 segs = map->segments; 1113 return (segs); 1114 } 1115 1116 /* 1117 * Release the mapping held by map. 1118 */ 1119 void 1120 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) 1121 { 1122 struct bounce_zone *bz; 1123 1124 if ((bz = dmat->bounce_zone) != NULL) { 1125 free_bounce_pages(dmat, map); 1126 1127 if (map->pagesreserved != 0) { 1128 mtx_lock(&bounce_lock); 1129 bz->free_bpages += map->pagesreserved; 1130 bz->reserved_bpages -= map->pagesreserved; 1131 mtx_unlock(&bounce_lock); 1132 map->pagesreserved = 0; 1133 } 1134 map->pagesneeded = 0; 1135 } 1136 map->sync_count = 0; 1137 map->flags &= ~DMAMAP_MBUF; 1138 } 1139 1140 static void 1141 dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 1142 { 1143 /* 1144 * Write back any partial cachelines immediately before and 1145 * after the DMA region. We don't need to round the address 1146 * down to the nearest cacheline or specify the exact size, 1147 * as dcache_wb_poc() will do the rounding for us and works 1148 * at cacheline granularity. 1149 */ 1150 if (va & BUSDMA_DCACHE_MASK) 1151 dcache_wb_poc(va, pa, 1); 1152 if ((va + size) & BUSDMA_DCACHE_MASK) 1153 dcache_wb_poc(va + size, pa + size, 1); 1154 1155 dcache_inv_poc_dma(va, pa, size); 1156 } 1157 1158 static void 1159 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) 1160 { 1161 uint32_t len, offset; 1162 vm_page_t m; 1163 vm_paddr_t pa; 1164 vm_offset_t va, tempva; 1165 bus_size_t size; 1166 1167 offset = sl->paddr & PAGE_MASK; 1168 m = sl->pages; 1169 size = sl->datacount; 1170 pa = sl->paddr; 1171 1172 for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { 1173 tempva = 0; 1174 if (sl->vaddr == 0) { 1175 len = min(PAGE_SIZE - offset, size); 1176 tempva = pmap_quick_enter_page(m); 1177 va = tempva | offset; 1178 KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), 1179 ("unexpected vm_page_t phys: 0x%08x != 0x%08x", 1180 VM_PAGE_TO_PHYS(m) | offset, pa)); 1181 } else { 1182 len = sl->datacount; 1183 va = sl->vaddr; 1184 } 1185 1186 switch (op) { 1187 case BUS_DMASYNC_PREWRITE: 1188 case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: 1189 dcache_wb_poc(va, pa, len); 1190 break; 1191 case BUS_DMASYNC_PREREAD: 1192 /* 1193 * An mbuf may start in the middle of a cacheline. There 1194 * will be no cpu writes to the beginning of that line 1195 * (which contains the mbuf header) while dma is in 1196 * progress. Handle that case by doing a writeback of 1197 * just the first cacheline before invalidating the 1198 * overall buffer. Any mbuf in a chain may have this 1199 * misalignment. Buffers which are not mbufs bounce if 1200 * they are not aligned to a cacheline. 1201 */ 1202 dma_preread_safe(va, pa, len); 1203 break; 1204 case BUS_DMASYNC_POSTREAD: 1205 case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: 1206 dcache_inv_poc(va, pa, len); 1207 break; 1208 default: 1209 panic("unsupported combination of sync operations: " 1210 "0x%08x\n", op); 1211 } 1212 1213 if (tempva != 0) 1214 pmap_quick_remove_page(tempva); 1215 } 1216 } 1217 1218 void 1219 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) 1220 { 1221 struct bounce_page *bpage; 1222 struct sync_list *sl, *end; 1223 vm_offset_t datavaddr, tempvaddr; 1224 1225 if (op == BUS_DMASYNC_POSTWRITE) 1226 return; 1227 1228 /* 1229 * If the buffer was from user space, it is possible that this is not 1230 * the same vm map, especially on a POST operation. It's not clear that 1231 * dma on userland buffers can work at all right now. To be safe, until 1232 * we're able to test direct userland dma, panic on a map mismatch. 1233 */ 1234 if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { 1235 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " 1236 "performing bounce", __func__, dmat, dmat->flags, op); 1237 1238 /* 1239 * For PREWRITE do a writeback. Clean the caches from the 1240 * innermost to the outermost levels. 1241 */ 1242 if (op & BUS_DMASYNC_PREWRITE) { 1243 while (bpage != NULL) { 1244 tempvaddr = 0; 1245 datavaddr = bpage->datavaddr; 1246 if (datavaddr == 0) { 1247 tempvaddr = pmap_quick_enter_page( 1248 bpage->datapage); 1249 datavaddr = tempvaddr | bpage->dataoffs; 1250 } 1251 bcopy((void *)datavaddr, (void *)bpage->vaddr, 1252 bpage->datacount); 1253 if (tempvaddr != 0) 1254 pmap_quick_remove_page(tempvaddr); 1255 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1256 dcache_wb_poc(bpage->vaddr, 1257 bpage->busaddr, bpage->datacount); 1258 bpage = STAILQ_NEXT(bpage, links); 1259 } 1260 dmat->bounce_zone->total_bounced++; 1261 } 1262 1263 /* 1264 * Do an invalidate for PREREAD unless a writeback was already 1265 * done above due to PREWRITE also being set. The reason for a 1266 * PREREAD invalidate is to prevent dirty lines currently in the 1267 * cache from being evicted during the DMA. If a writeback was 1268 * done due to PREWRITE also being set there will be no dirty 1269 * lines and the POSTREAD invalidate handles the rest. The 1270 * invalidate is done from the innermost to outermost level. If 1271 * L2 were done first, a dirty cacheline could be automatically 1272 * evicted from L1 before we invalidated it, re-dirtying the L2. 1273 */ 1274 if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) { 1275 bpage = STAILQ_FIRST(&map->bpages); 1276 while (bpage != NULL) { 1277 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1278 dcache_inv_poc_dma(bpage->vaddr, 1279 bpage->busaddr, bpage->datacount); 1280 bpage = STAILQ_NEXT(bpage, links); 1281 } 1282 } 1283 1284 /* 1285 * Re-invalidate the caches on a POSTREAD, even though they were 1286 * already invalidated at PREREAD time. Aggressive prefetching 1287 * due to accesses to other data near the dma buffer could have 1288 * brought buffer data into the caches which is now stale. The 1289 * caches are invalidated from the outermost to innermost; the 1290 * prefetches could be happening right now, and if L1 were 1291 * invalidated first, stale L2 data could be prefetched into L1. 1292 */ 1293 if (op & BUS_DMASYNC_POSTREAD) { 1294 while (bpage != NULL) { 1295 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1296 dcache_inv_poc(bpage->vaddr, 1297 bpage->busaddr, bpage->datacount); 1298 tempvaddr = 0; 1299 datavaddr = bpage->datavaddr; 1300 if (datavaddr == 0) { 1301 tempvaddr = pmap_quick_enter_page( 1302 bpage->datapage); 1303 datavaddr = tempvaddr | bpage->dataoffs; 1304 } 1305 bcopy((void *)bpage->vaddr, (void *)datavaddr, 1306 bpage->datacount); 1307 if (tempvaddr != 0) 1308 pmap_quick_remove_page(tempvaddr); 1309 bpage = STAILQ_NEXT(bpage, links); 1310 } 1311 dmat->bounce_zone->total_bounced++; 1312 } 1313 } 1314 1315 /* 1316 * For COHERENT memory no cache maintenance is necessary, but ensure all 1317 * writes have reached memory for the PREWRITE case. No action is 1318 * needed for a PREREAD without PREWRITE also set, because that would 1319 * imply that the cpu had written to the COHERENT buffer and expected 1320 * the dma device to see that change, and by definition a PREWRITE sync 1321 * is required to make that happen. 1322 */ 1323 if (map->flags & DMAMAP_COHERENT) { 1324 if (op & BUS_DMASYNC_PREWRITE) { 1325 dsb(); 1326 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1327 cpu_l2cache_drain_writebuf(); 1328 } 1329 return; 1330 } 1331 1332 /* 1333 * Cache maintenance for normal (non-COHERENT non-bounce) buffers. All 1334 * the comments about the sequences for flushing cache levels in the 1335 * bounce buffer code above apply here as well. In particular, the fact 1336 * that the sequence is inner-to-outer for PREREAD invalidation and 1337 * outer-to-inner for POSTREAD invalidation is not a mistake. 1338 */ 1339 if (map->sync_count != 0) { 1340 sl = &map->slist[0]; 1341 end = &map->slist[map->sync_count]; 1342 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " 1343 "performing sync", __func__, dmat, dmat->flags, op); 1344 1345 for ( ; sl != end; ++sl) 1346 dma_dcache_sync(sl, op); 1347 } 1348 } 1349