Lines Matching +full:dma +full:- +full:coherent

1 // SPDX-License-Identifier: GPL-2.0-only
3 * linux/arch/arm/mm/dma-mapping.c
5 * Copyright (C) 2000-2004 Russell King
7 * DMA uncached mapping support.
17 #include <linux/dma-direct.h>
18 #include <linux/dma-map-ops.h>
33 #include <asm/dma-iommu.h>
36 #include <xen/swiotlb-xen.h>
38 #include "dma.h"
60 #define COHERENT 1 macro
84 if (buf->virt == virt) { in arm_dma_buffer_find()
85 list_del(&buf->list); in arm_dma_buffer_find()
95 * The DMA API is built upon the notion of "buffer ownership". A buffer
97 * by it) or exclusively owned by the DMA device. These helper functions
112 * arm_dma_map_page - map a portion of a page for streaming DMA
113 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
117 * @dir: DMA transfer direction
142 * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
143 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
144 * @handle: DMA address of buffer
146 * @dir: DMA transfer direction (same as passed to dma_map_page)
148 * Unmap a page streaming mode DMA translation. The handle and size
166 unsigned int offset = handle & (PAGE_SIZE - 1); in arm_dma_sync_single_for_cpu()
167 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); in arm_dma_sync_single_for_cpu()
174 unsigned int offset = handle & (PAGE_SIZE - 1); in arm_dma_sync_single_for_device()
175 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); in arm_dma_sync_single_for_device()
180 * Return whether the given device DMA address mask can be supported
181 * properly. For example, if your device can only drive the low 24-bits
187 unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); in arm_dma_supported()
190 * Translate the device's DMA mask to a PFN limit. This in arm_dma_supported()
191 * PFN number includes the page which we can DMA to. in arm_dma_supported()
244 * lurking in the kernel direct-mapped region is invalidated. in __dma_clear_buffer()
252 if (coherent_flag != COHERENT) in __dma_clear_buffer()
256 size -= PAGE_SIZE; in __dma_clear_buffer()
258 if (coherent_flag != COHERENT) in __dma_clear_buffer()
263 if (coherent_flag != COHERENT) { in __dma_clear_buffer()
271 * Allocate a DMA buffer for 'dev' of size 'size' using the
297 * Free a DMA buffer. 'size' must be page aligned.
331 * Initialise the coherent pool for atomic allocations.
340 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); in atomic_pool_init()
344 * The atomic pool is only used for non-coherent allocations in atomic_pool_init()
359 atomic_pool_size, -1); in atomic_pool_init()
366 pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n", in atomic_pool_init()
375 pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", in atomic_pool_init()
377 return -ENOMEM; in atomic_pool_init()
416 map.length = end - start; in dma_contiguous_remap()
420 * Clear previous low-memory mapping to ensure that the in dma_contiguous_remap()
465 * non-coherent in __alloc_remap_buffer()
490 WARN(1, "coherent pool not initialised!\n"); in __alloc_from_pool()
579 /* __alloc_simple_buffer is only called when the device is coherent */ in __alloc_simple_buffer()
580 page = __dma_alloc_buffer(dev, size, gfp, COHERENT); in __alloc_simple_buffer()
591 return __alloc_simple_buffer(args->dev, args->size, args->gfp, in simple_allocator_alloc()
597 __dma_free_buffer(args->page, args->size); in simple_allocator_free()
608 return __alloc_from_contiguous(args->dev, args->size, args->prot, in cma_allocator_alloc()
609 ret_page, args->caller, in cma_allocator_alloc()
610 args->want_vaddr, args->coherent_flag, in cma_allocator_alloc()
611 args->gfp); in cma_allocator_alloc()
616 __free_from_contiguous(args->dev, args->page, args->cpu_addr, in cma_allocator_free()
617 args->size, args->want_vaddr); in cma_allocator_free()
628 return __alloc_from_pool(args->size, ret_page); in pool_allocator_alloc()
633 __free_from_pool(args->cpu_addr, args->size); in pool_allocator_free()
644 return __alloc_remap_buffer(args->dev, args->size, args->gfp, in remap_allocator_alloc()
645 args->prot, ret_page, args->caller, in remap_allocator_alloc()
646 args->want_vaddr); in remap_allocator_alloc()
651 if (args->want_vaddr) in remap_allocator_free()
652 dma_common_free_remap(args->cpu_addr, args->size); in remap_allocator_free()
654 __dma_free_buffer(args->page, args->size); in remap_allocator_free()
666 u64 mask = min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); in __dma_alloc()
678 .coherent_flag = is_coherent ? COHERENT : NORMAL, in __dma_alloc()
684 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", in __dma_alloc()
699 * Following is a work-around (a.k.a. hack) to prevent pages in __dma_alloc()
713 buf->allocator = &cma_allocator; in __dma_alloc()
715 buf->allocator = &simple_allocator; in __dma_alloc()
717 buf->allocator = &remap_allocator; in __dma_alloc()
719 buf->allocator = &pool_allocator; in __dma_alloc()
721 addr = buf->allocator->alloc(&args, &page); in __dma_alloc()
727 buf->virt = args.want_vaddr ? addr : page; in __dma_alloc()
730 list_add(&buf->list, &arm_dma_bufs); in __dma_alloc()
740 * Allocate DMA-coherent memory space and return both the kernel remapped
763 int ret = -ENXIO; in __arm_dma_mmap()
767 unsigned long off = vma->vm_pgoff; in __arm_dma_mmap()
772 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { in __arm_dma_mmap()
773 ret = remap_pfn_range(vma, vma->vm_start, in __arm_dma_mmap()
775 vma->vm_end - vma->vm_start, in __arm_dma_mmap()
776 vma->vm_page_prot); in __arm_dma_mmap()
783 * Create userspace mapping for the DMA-coherent memory.
796 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); in arm_dma_mmap()
821 buf->allocator->free(&args); in __arm_dma_free()
847 return -ENXIO; in arm_dma_get_sgtable()
855 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); in arm_dma_get_sgtable()
883 len = PAGE_SIZE - offset; in dma_cache_maint_page()
902 left -= len; in dma_cache_maint_page()
910 * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
925 /* FIXME: non-speculating: flush on bidirectional mappings? */ in __dma_page_cpu_to_dev()
933 /* FIXME: non-speculating: not required */ in __dma_page_dev_to_cpu()
934 /* in any case, don't bother invalidating if DMA to device */ in __dma_page_dev_to_cpu()
942 * Mark the D-cache clean for these pages to avoid extra flushing. in __dma_page_dev_to_cpu()
952 left -= PAGE_SIZE - off; in __dma_page_dev_to_cpu()
956 set_bit(PG_dcache_clean, &page->flags); in __dma_page_dev_to_cpu()
957 left -= PAGE_SIZE; in __dma_page_dev_to_cpu()
963 * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA
964 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
967 * @dir: DMA transfer direction
969 * Map a set of buffers described by scatterlist in streaming mode for DMA.
970 * This is the scatter-gather version of the dma_map_single interface.
972 * appropriate dma address and length. They are obtained via
987 s->dma_length = s->length; in arm_dma_map_sg()
989 s->dma_address = ops->map_page(dev, sg_page(s), s->offset, in arm_dma_map_sg()
990 s->length, dir, attrs); in arm_dma_map_sg()
991 if (dma_mapping_error(dev, s->dma_address)) in arm_dma_map_sg()
998 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); in arm_dma_map_sg()
1003 * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
1004 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
1007 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1009 * Unmap a set of streaming mode DMA translations. Again, CPU access
1021 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); in arm_dma_unmap_sg()
1026 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
1029 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1039 ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, in arm_dma_sync_sg_for_cpu()
1045 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
1048 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1058 ops->sync_single_for_device(dev, sg_dma_address(s), s->length, in arm_dma_sync_sg_for_device()
1062 static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) in arm_get_dma_map_ops() argument
1066 * 32-bits, which then can't be addressed by devices that only support in arm_get_dma_map_ops()
1067 * 32-bit DMA. in arm_get_dma_map_ops()
1068 * Use the generic dma-direct / swiotlb ops code in that case, as that in arm_get_dma_map_ops()
1073 return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; in arm_get_dma_map_ops()
1107 size_t mapping_size = mapping->bits << PAGE_SHIFT; in __alloc_iova()
1116 align = (1 << order) - 1; in __alloc_iova()
1118 spin_lock_irqsave(&mapping->lock, flags); in __alloc_iova()
1119 for (i = 0; i < mapping->nr_bitmaps; i++) { in __alloc_iova()
1120 start = bitmap_find_next_zero_area(mapping->bitmaps[i], in __alloc_iova()
1121 mapping->bits, 0, count, align); in __alloc_iova()
1123 if (start > mapping->bits) in __alloc_iova()
1126 bitmap_set(mapping->bitmaps[i], start, count); in __alloc_iova()
1135 if (i == mapping->nr_bitmaps) { in __alloc_iova()
1137 spin_unlock_irqrestore(&mapping->lock, flags); in __alloc_iova()
1141 start = bitmap_find_next_zero_area(mapping->bitmaps[i], in __alloc_iova()
1142 mapping->bits, 0, count, align); in __alloc_iova()
1144 if (start > mapping->bits) { in __alloc_iova()
1145 spin_unlock_irqrestore(&mapping->lock, flags); in __alloc_iova()
1149 bitmap_set(mapping->bitmaps[i], start, count); in __alloc_iova()
1151 spin_unlock_irqrestore(&mapping->lock, flags); in __alloc_iova()
1153 iova = mapping->base + (mapping_size * i); in __alloc_iova()
1163 size_t mapping_size = mapping->bits << PAGE_SHIFT; in __free_iova()
1171 bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; in __free_iova()
1172 BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); in __free_iova()
1174 bitmap_base = mapping->base + mapping_size * bitmap_index; in __free_iova()
1176 start = (addr - bitmap_base) >> PAGE_SHIFT; in __free_iova()
1189 spin_lock_irqsave(&mapping->lock, flags); in __free_iova()
1190 bitmap_clear(mapping->bitmaps[bitmap_index], start, count); in __free_iova()
1191 spin_unlock_irqrestore(&mapping->lock, flags); in __free_iova()
1234 order_idx = ARRAY_SIZE(iommu_order_array) - 1; in __iommu_alloc_buffer()
1253 /* See if it's easy to allocate a high-order chunk */ in __iommu_alloc_buffer()
1270 while (--j) in __iommu_alloc_buffer()
1276 count -= 1 << order; in __iommu_alloc_buffer()
1281 while (i--) in __iommu_alloc_buffer()
1334 len = (j - i) << PAGE_SHIFT; in __iommu_create_mapping()
1335 ret = iommu_map(mapping->domain, iova, phys, len, in __iommu_create_mapping()
1344 iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); in __iommu_create_mapping()
1354 * add optional in-page offset from iova to size and align in __iommu_remove_mapping()
1360 iommu_unmap(mapping->domain, iova, size); in __iommu_remove_mapping()
1394 if (coherent_flag == COHERENT) in __iommu_alloc_simple()
1416 if (coherent_flag == COHERENT) in __iommu_free_atomic()
1433 if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) in __arm_iommu_alloc_attrs()
1438 * Following is a work-around (a.k.a. hack) to prevent pages in __arm_iommu_alloc_attrs()
1480 return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT); in arm_coherent_iommu_alloc_attrs()
1492 return -ENXIO; in __arm_iommu_mmap_attrs()
1494 if (vma->vm_pgoff >= nr_pages) in __arm_iommu_mmap_attrs()
1495 return -ENXIO; in __arm_iommu_mmap_attrs()
1507 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); in arm_iommu_mmap_attrs()
1529 if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) { in __arm_iommu_free_attrs()
1536 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); in __arm_iommu_free_attrs()
1557 __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT); in arm_coherent_iommu_free_attrs()
1568 return -ENXIO; in arm_iommu_get_sgtable()
1575 * Map a part of the scatter-gather list into contiguous io address space
1594 return -ENOMEM; in __map_sg_chunk()
1598 unsigned int len = PAGE_ALIGN(s->offset + s->length); in __map_sg_chunk()
1601 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); in __map_sg_chunk()
1605 ret = iommu_map(mapping->domain, iova, phys, len, prot); in __map_sg_chunk()
1615 iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); in __map_sg_chunk()
1624 struct scatterlist *s = sg, *dma = sg, *start = sg; in __iommu_map_sg() local
1626 unsigned int offset = s->offset; in __iommu_map_sg()
1627 unsigned int size = s->offset + s->length; in __iommu_map_sg()
1633 s->dma_address = DMA_MAPPING_ERROR; in __iommu_map_sg()
1634 s->dma_length = 0; in __iommu_map_sg()
1636 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { in __iommu_map_sg()
1637 if (__map_sg_chunk(dev, start, size, &dma->dma_address, in __iommu_map_sg()
1641 dma->dma_address += offset; in __iommu_map_sg()
1642 dma->dma_length = size - offset; in __iommu_map_sg()
1644 size = offset = s->offset; in __iommu_map_sg()
1646 dma = sg_next(dma); in __iommu_map_sg()
1649 size += s->length; in __iommu_map_sg()
1651 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, in __iommu_map_sg()
1655 dma->dma_address += offset; in __iommu_map_sg()
1656 dma->dma_length = size - offset; in __iommu_map_sg()
1667 * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA
1671 * @dir: DMA transfer direction
1673 * Map a set of i/o coherent buffers described by scatterlist in streaming
1674 * mode for DMA. The scatter gather list elements are merged together (if
1675 * possible) and tagged with the appropriate dma address and length. They are
1685 * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
1689 * @dir: DMA transfer direction
1691 * Map a set of buffers described by scatterlist in streaming mode for DMA.
1693 * tagged with the appropriate dma address and length. They are obtained via
1714 __dma_page_dev_to_cpu(sg_page(s), s->offset, in __iommu_unmap_sg()
1715 s->length, dir); in __iommu_unmap_sg()
1720 * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
1724 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1726 * Unmap a set of streaming mode DMA translations. Again, CPU access
1737 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
1741 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1743 * Unmap a set of streaming mode DMA translations. Again, CPU access
1759 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1769 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); in arm_iommu_sync_sg_for_cpu()
1778 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1788 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); in arm_iommu_sync_sg_for_device()
1798 * @dir: DMA transfer direction
1800 * Coherent IOMMU aware version of arm_dma_map_page()
1816 ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); in arm_coherent_iommu_map_page()
1832 * @dir: DMA transfer direction
1849 * @handle: DMA address of buffer
1851 * @dir: DMA transfer direction (same as passed to dma_map_page)
1853 * Coherent IOMMU aware version of arm_dma_unmap_page()
1866 iommu_unmap(mapping->domain, iova, len); in arm_coherent_iommu_unmap_page()
1873 * @handle: DMA address of buffer
1875 * @dir: DMA transfer direction (same as passed to dma_map_page)
1884 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); in arm_iommu_unmap_page()
1894 iommu_unmap(mapping->domain, iova, len); in arm_iommu_unmap_page()
1899 * arm_iommu_map_resource - map a device resource for DMA
1903 * @dir: DMA transfer direction
1922 ret = iommu_map(mapping->domain, dma_addr, addr, len, prot); in arm_iommu_map_resource()
1933 * arm_iommu_unmap_resource - unmap a device DMA resource
1935 * @dma_handle: DMA address to resource
1937 * @dir: DMA transfer direction
1951 iommu_unmap(mapping->domain, iova, len); in arm_iommu_unmap_resource()
1960 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); in arm_iommu_sync_single_for_cpu()
1974 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); in arm_iommu_sync_single_for_device()
2043 int err = -ENOMEM; in arm_iommu_create_mapping()
2045 /* currently only 32-bit DMA address space is supported */ in arm_iommu_create_mapping()
2047 return ERR_PTR(-ERANGE); in arm_iommu_create_mapping()
2050 return ERR_PTR(-EINVAL); in arm_iommu_create_mapping()
2061 mapping->bitmap_size = bitmap_size; in arm_iommu_create_mapping()
2062 mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *), in arm_iommu_create_mapping()
2064 if (!mapping->bitmaps) in arm_iommu_create_mapping()
2067 mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); in arm_iommu_create_mapping()
2068 if (!mapping->bitmaps[0]) in arm_iommu_create_mapping()
2071 mapping->nr_bitmaps = 1; in arm_iommu_create_mapping()
2072 mapping->extensions = extensions; in arm_iommu_create_mapping()
2073 mapping->base = base; in arm_iommu_create_mapping()
2074 mapping->bits = BITS_PER_BYTE * bitmap_size; in arm_iommu_create_mapping()
2076 spin_lock_init(&mapping->lock); in arm_iommu_create_mapping()
2078 mapping->domain = iommu_domain_alloc(bus); in arm_iommu_create_mapping()
2079 if (!mapping->domain) in arm_iommu_create_mapping()
2082 kref_init(&mapping->kref); in arm_iommu_create_mapping()
2085 kfree(mapping->bitmaps[0]); in arm_iommu_create_mapping()
2087 kfree(mapping->bitmaps); in arm_iommu_create_mapping()
2101 iommu_domain_free(mapping->domain); in release_iommu_mapping()
2102 for (i = 0; i < mapping->nr_bitmaps; i++) in release_iommu_mapping()
2103 kfree(mapping->bitmaps[i]); in release_iommu_mapping()
2104 kfree(mapping->bitmaps); in release_iommu_mapping()
2112 if (mapping->nr_bitmaps >= mapping->extensions) in extend_iommu_mapping()
2113 return -EINVAL; in extend_iommu_mapping()
2115 next_bitmap = mapping->nr_bitmaps; in extend_iommu_mapping()
2116 mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, in extend_iommu_mapping()
2118 if (!mapping->bitmaps[next_bitmap]) in extend_iommu_mapping()
2119 return -ENOMEM; in extend_iommu_mapping()
2121 mapping->nr_bitmaps++; in extend_iommu_mapping()
2129 kref_put(&mapping->kref, release_iommu_mapping); in arm_iommu_release_mapping()
2138 err = iommu_attach_device(mapping->domain, dev); in __arm_iommu_attach_device()
2142 kref_get(&mapping->kref); in __arm_iommu_attach_device()
2156 * This replaces the dma operations (dma_map_ops pointer) with the
2181 * This overwrites the dma_ops pointer with appropriate non-IOMMU ops.
2193 iommu_detach_device(mapping->domain, dev); in arm_iommu_detach_device()
2194 kref_put(&mapping->kref, release_iommu_mapping); in arm_iommu_detach_device()
2196 set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent)); in arm_iommu_detach_device()
2202 static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) in arm_get_iommu_dma_map_ops() argument
2204 return coherent ? &iommu_coherent_ops : &iommu_ops; in arm_get_iommu_dma_map_ops()
2215 mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); in arm_setup_iommu_dma_ops()
2217 pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", in arm_setup_iommu_dma_ops()
2258 const struct iommu_ops *iommu, bool coherent) in arch_setup_dma_ops() argument
2262 dev->archdata.dma_coherent = coherent; in arch_setup_dma_ops()
2264 dev->dma_coherent = coherent; in arch_setup_dma_ops()
2272 if (dev->dma_ops) in arch_setup_dma_ops()
2276 dma_ops = arm_get_iommu_dma_map_ops(coherent); in arch_setup_dma_ops()
2278 dma_ops = arm_get_dma_map_ops(coherent); in arch_setup_dma_ops()
2284 dev->dma_ops = &xen_swiotlb_dma_ops; in arch_setup_dma_ops()
2286 dev->archdata.dma_ops_setup = true; in arch_setup_dma_ops()
2291 if (!dev->archdata.dma_ops_setup) in arch_teardown_dma_ops()
2295 /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ in arch_teardown_dma_ops()
2303 __dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), in arch_sync_dma_for_device()
2310 __dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), in arch_sync_dma_for_cpu()