1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * SWIOTLB-based DMA API implementation 4 * 5 * Copyright (C) 2012 ARM Ltd. 6 * Author: Catalin Marinas <catalin.marinas@arm.com> 7 */ 8 9 #include <linux/gfp.h> 10 #include <linux/acpi.h> 11 #include <linux/memblock.h> 12 #include <linux/cache.h> 13 #include <linux/export.h> 14 #include <linux/slab.h> 15 #include <linux/genalloc.h> 16 #include <linux/dma-direct.h> 17 #include <linux/dma-noncoherent.h> 18 #include <linux/dma-contiguous.h> 19 #include <linux/vmalloc.h> 20 #include <linux/swiotlb.h> 21 #include <linux/pci.h> 22 23 #include <asm/cacheflush.h> 24 25 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 26 unsigned long attrs) 27 { 28 if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) 29 return pgprot_writecombine(prot); 30 return prot; 31 } 32 33 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, 34 size_t size, enum dma_data_direction dir) 35 { 36 __dma_map_area(phys_to_virt(paddr), size, dir); 37 } 38 39 void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, 40 size_t size, enum dma_data_direction dir) 41 { 42 __dma_unmap_area(phys_to_virt(paddr), size, dir); 43 } 44 45 void arch_dma_prep_coherent(struct page *page, size_t size) 46 { 47 __dma_flush_area(page_address(page), size); 48 } 49 50 #ifdef CONFIG_IOMMU_DMA 51 static int __swiotlb_get_sgtable_page(struct sg_table *sgt, 52 struct page *page, size_t size) 53 { 54 int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 55 56 if (!ret) 57 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 58 59 return ret; 60 } 61 62 static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, 63 unsigned long pfn, size_t size) 64 { 65 int ret = -ENXIO; 66 unsigned long nr_vma_pages = vma_pages(vma); 67 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 68 unsigned long off = vma->vm_pgoff; 69 70 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { 71 ret = remap_pfn_range(vma, vma->vm_start, 72 pfn + off, 73 vma->vm_end - vma->vm_start, 74 vma->vm_page_prot); 75 } 76 77 return ret; 78 } 79 #endif /* CONFIG_IOMMU_DMA */ 80 81 static int __init arm64_dma_init(void) 82 { 83 WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), 84 TAINT_CPU_OUT_OF_SPEC, 85 "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", 86 ARCH_DMA_MINALIGN, cache_line_size()); 87 return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); 88 } 89 arch_initcall(arm64_dma_init); 90 91 #ifdef CONFIG_IOMMU_DMA 92 #include <linux/dma-iommu.h> 93 #include <linux/platform_device.h> 94 #include <linux/amba/bus.h> 95 96 /* Thankfully, all cache ops are by VA so we can ignore phys here */ 97 static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) 98 { 99 __dma_flush_area(virt, PAGE_SIZE); 100 } 101 102 static void *__iommu_alloc_attrs(struct device *dev, size_t size, 103 dma_addr_t *handle, gfp_t gfp, 104 unsigned long attrs) 105 { 106 bool coherent = dev_is_dma_coherent(dev); 107 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 108 size_t iosize = size; 109 void *addr; 110 111 if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) 112 return NULL; 113 114 size = PAGE_ALIGN(size); 115 116 /* 117 * Some drivers rely on this, and we probably don't want the 118 * possibility of stale kernel data being read by devices anyway. 119 */ 120 gfp |= __GFP_ZERO; 121 122 if (!gfpflags_allow_blocking(gfp)) { 123 struct page *page; 124 /* 125 * In atomic context we can't remap anything, so we'll only 126 * get the virtually contiguous buffer we need by way of a 127 * physically contiguous allocation. 128 */ 129 if (coherent) { 130 page = alloc_pages(gfp, get_order(size)); 131 addr = page ? page_address(page) : NULL; 132 } else { 133 addr = dma_alloc_from_pool(size, &page, gfp); 134 } 135 if (!addr) 136 return NULL; 137 138 *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); 139 if (*handle == DMA_MAPPING_ERROR) { 140 if (coherent) 141 __free_pages(page, get_order(size)); 142 else 143 dma_free_from_pool(addr, size); 144 addr = NULL; 145 } 146 } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 147 pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); 148 struct page *page; 149 150 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, 151 get_order(size), gfp & __GFP_NOWARN); 152 if (!page) 153 return NULL; 154 155 *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); 156 if (*handle == DMA_MAPPING_ERROR) { 157 dma_release_from_contiguous(dev, page, 158 size >> PAGE_SHIFT); 159 return NULL; 160 } 161 addr = dma_common_contiguous_remap(page, size, VM_USERMAP, 162 prot, 163 __builtin_return_address(0)); 164 if (addr) { 165 if (!coherent) 166 __dma_flush_area(page_to_virt(page), iosize); 167 memset(addr, 0, size); 168 } else { 169 iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); 170 dma_release_from_contiguous(dev, page, 171 size >> PAGE_SHIFT); 172 } 173 } else { 174 pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); 175 struct page **pages; 176 177 pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, 178 handle, flush_page); 179 if (!pages) 180 return NULL; 181 182 addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 183 __builtin_return_address(0)); 184 if (!addr) 185 iommu_dma_free(dev, pages, iosize, handle); 186 } 187 return addr; 188 } 189 190 static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 191 dma_addr_t handle, unsigned long attrs) 192 { 193 size_t iosize = size; 194 195 size = PAGE_ALIGN(size); 196 /* 197 * @cpu_addr will be one of 4 things depending on how it was allocated: 198 * - A remapped array of pages for contiguous allocations. 199 * - A remapped array of pages from iommu_dma_alloc(), for all 200 * non-atomic allocations. 201 * - A non-cacheable alias from the atomic pool, for atomic 202 * allocations by non-coherent devices. 203 * - A normal lowmem address, for atomic allocations by 204 * coherent devices. 205 * Hence how dodgy the below logic looks... 206 */ 207 if (dma_in_atomic_pool(cpu_addr, size)) { 208 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 209 dma_free_from_pool(cpu_addr, size); 210 } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 211 struct page *page = vmalloc_to_page(cpu_addr); 212 213 iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); 214 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); 215 dma_common_free_remap(cpu_addr, size, VM_USERMAP); 216 } else if (is_vmalloc_addr(cpu_addr)){ 217 struct vm_struct *area = find_vm_area(cpu_addr); 218 219 if (WARN_ON(!area || !area->pages)) 220 return; 221 iommu_dma_free(dev, area->pages, iosize, &handle); 222 dma_common_free_remap(cpu_addr, size, VM_USERMAP); 223 } else { 224 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 225 __free_pages(virt_to_page(cpu_addr), get_order(size)); 226 } 227 } 228 229 static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 230 void *cpu_addr, dma_addr_t dma_addr, size_t size, 231 unsigned long attrs) 232 { 233 struct vm_struct *area; 234 int ret; 235 236 vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); 237 238 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 239 return ret; 240 241 if (!is_vmalloc_addr(cpu_addr)) { 242 unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); 243 return __swiotlb_mmap_pfn(vma, pfn, size); 244 } 245 246 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 247 /* 248 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, 249 * hence in the vmalloc space. 250 */ 251 unsigned long pfn = vmalloc_to_pfn(cpu_addr); 252 return __swiotlb_mmap_pfn(vma, pfn, size); 253 } 254 255 area = find_vm_area(cpu_addr); 256 if (WARN_ON(!area || !area->pages)) 257 return -ENXIO; 258 259 return iommu_dma_mmap(area->pages, size, vma); 260 } 261 262 static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 263 void *cpu_addr, dma_addr_t dma_addr, 264 size_t size, unsigned long attrs) 265 { 266 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 267 struct vm_struct *area = find_vm_area(cpu_addr); 268 269 if (!is_vmalloc_addr(cpu_addr)) { 270 struct page *page = virt_to_page(cpu_addr); 271 return __swiotlb_get_sgtable_page(sgt, page, size); 272 } 273 274 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 275 /* 276 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, 277 * hence in the vmalloc space. 278 */ 279 struct page *page = vmalloc_to_page(cpu_addr); 280 return __swiotlb_get_sgtable_page(sgt, page, size); 281 } 282 283 if (WARN_ON(!area || !area->pages)) 284 return -ENXIO; 285 286 return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, 287 GFP_KERNEL); 288 } 289 290 static void __iommu_sync_single_for_cpu(struct device *dev, 291 dma_addr_t dev_addr, size_t size, 292 enum dma_data_direction dir) 293 { 294 phys_addr_t phys; 295 296 if (dev_is_dma_coherent(dev)) 297 return; 298 299 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); 300 arch_sync_dma_for_cpu(dev, phys, size, dir); 301 } 302 303 static void __iommu_sync_single_for_device(struct device *dev, 304 dma_addr_t dev_addr, size_t size, 305 enum dma_data_direction dir) 306 { 307 phys_addr_t phys; 308 309 if (dev_is_dma_coherent(dev)) 310 return; 311 312 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); 313 arch_sync_dma_for_device(dev, phys, size, dir); 314 } 315 316 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, 317 unsigned long offset, size_t size, 318 enum dma_data_direction dir, 319 unsigned long attrs) 320 { 321 bool coherent = dev_is_dma_coherent(dev); 322 int prot = dma_info_to_prot(dir, coherent, attrs); 323 dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); 324 325 if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 326 dev_addr != DMA_MAPPING_ERROR) 327 __dma_map_area(page_address(page) + offset, size, dir); 328 329 return dev_addr; 330 } 331 332 static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, 333 size_t size, enum dma_data_direction dir, 334 unsigned long attrs) 335 { 336 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 337 __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); 338 339 iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); 340 } 341 342 static void __iommu_sync_sg_for_cpu(struct device *dev, 343 struct scatterlist *sgl, int nelems, 344 enum dma_data_direction dir) 345 { 346 struct scatterlist *sg; 347 int i; 348 349 if (dev_is_dma_coherent(dev)) 350 return; 351 352 for_each_sg(sgl, sg, nelems, i) 353 arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); 354 } 355 356 static void __iommu_sync_sg_for_device(struct device *dev, 357 struct scatterlist *sgl, int nelems, 358 enum dma_data_direction dir) 359 { 360 struct scatterlist *sg; 361 int i; 362 363 if (dev_is_dma_coherent(dev)) 364 return; 365 366 for_each_sg(sgl, sg, nelems, i) 367 arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); 368 } 369 370 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 371 int nelems, enum dma_data_direction dir, 372 unsigned long attrs) 373 { 374 bool coherent = dev_is_dma_coherent(dev); 375 376 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 377 __iommu_sync_sg_for_device(dev, sgl, nelems, dir); 378 379 return iommu_dma_map_sg(dev, sgl, nelems, 380 dma_info_to_prot(dir, coherent, attrs)); 381 } 382 383 static void __iommu_unmap_sg_attrs(struct device *dev, 384 struct scatterlist *sgl, int nelems, 385 enum dma_data_direction dir, 386 unsigned long attrs) 387 { 388 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 389 __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); 390 391 iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); 392 } 393 394 static const struct dma_map_ops iommu_dma_ops = { 395 .alloc = __iommu_alloc_attrs, 396 .free = __iommu_free_attrs, 397 .mmap = __iommu_mmap_attrs, 398 .get_sgtable = __iommu_get_sgtable, 399 .map_page = __iommu_map_page, 400 .unmap_page = __iommu_unmap_page, 401 .map_sg = __iommu_map_sg_attrs, 402 .unmap_sg = __iommu_unmap_sg_attrs, 403 .sync_single_for_cpu = __iommu_sync_single_for_cpu, 404 .sync_single_for_device = __iommu_sync_single_for_device, 405 .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, 406 .sync_sg_for_device = __iommu_sync_sg_for_device, 407 .map_resource = iommu_dma_map_resource, 408 .unmap_resource = iommu_dma_unmap_resource, 409 }; 410 411 static int __init __iommu_dma_init(void) 412 { 413 return iommu_dma_init(); 414 } 415 arch_initcall(__iommu_dma_init); 416 417 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 418 const struct iommu_ops *ops) 419 { 420 struct iommu_domain *domain; 421 422 if (!ops) 423 return; 424 425 /* 426 * The IOMMU core code allocates the default DMA domain, which the 427 * underlying IOMMU driver needs to support via the dma-iommu layer. 428 */ 429 domain = iommu_get_domain_for_dev(dev); 430 431 if (!domain) 432 goto out_err; 433 434 if (domain->type == IOMMU_DOMAIN_DMA) { 435 if (iommu_dma_init_domain(domain, dma_base, size, dev)) 436 goto out_err; 437 438 dev->dma_ops = &iommu_dma_ops; 439 } 440 441 return; 442 443 out_err: 444 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 445 dev_name(dev)); 446 } 447 448 void arch_teardown_dma_ops(struct device *dev) 449 { 450 dev->dma_ops = NULL; 451 } 452 453 #else 454 455 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 456 const struct iommu_ops *iommu) 457 { } 458 459 #endif /* CONFIG_IOMMU_DMA */ 460 461 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 462 const struct iommu_ops *iommu, bool coherent) 463 { 464 dev->dma_coherent = coherent; 465 __iommu_setup_dma_ops(dev, dma_base, size, iommu); 466 467 #ifdef CONFIG_XEN 468 if (xen_initial_domain()) 469 dev->dma_ops = xen_dma_ops; 470 #endif 471 } 472