From: Akinobu Mita Date: Wed, 4 Jun 2014 23:06:50 +0000 (-0700) Subject: x86: enable DMA CMA with swiotlb X-Git-Tag: firefly_0821_release~176^2~3785^2~16^2~221 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9c5a3621427da68afe6a078cadf807d2c8cc1d12;p=firefly-linux-kernel-4.4.55.git x86: enable DMA CMA with swiotlb The DMA Contiguous Memory Allocator support on x86 is disabled when swiotlb config option is enabled. So DMA CMA is always disabled on x86_64 because swiotlb is always enabled. This attempts to support for DMA CMA with enabling swiotlb config option. The contiguous memory allocator on x86 is integrated in the function dma_generic_alloc_coherent() which is .alloc callback in nommu_dma_ops for dma_alloc_coherent(). x86_swiotlb_alloc_coherent() which is .alloc callback in swiotlb_dma_ops tries to allocate with dma_generic_alloc_coherent() firstly and then swiotlb_alloc_coherent() is called as a fallback. The main part of supporting DMA CMA with swiotlb is that changing x86_swiotlb_free_coherent() which is .free callback in swiotlb_dma_ops for dma_free_coherent() so that it can distinguish memory allocated by dma_generic_alloc_coherent() from one allocated by swiotlb_alloc_coherent() and release it with dma_generic_free_coherent() which can handle contiguous memory. This change requires making is_swiotlb_buffer() global function. This also needs to change .free callback in the dma_map_ops for amd_gart and sta2x11, because these dma_ops are also using dma_generic_alloc_coherent(). Signed-off-by: Akinobu Mita Acked-by: Marek Szyprowski Acked-by: Konrad Rzeszutek Wilk Cc: David Woodhouse Cc: Don Dutile Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 896a411a4584..4a0137f6f032 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -41,7 +41,7 @@ config X86 select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS - select HAVE_DMA_CONTIGUOUS if !SWIOTLB + select HAVE_DMA_CONTIGUOUS select HAVE_KRETPROBES select GENERIC_EARLY_IOREMAP select HAVE_OPTPROBES diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 977f1761a25d..ab05d73e2bb7 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void) static inline void dma_mark_clean(void *addr, size_t size) {} +extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs); +extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs); + #endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index b574b295a2f9..8e3842fc8bea 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, struct dma_attrs *attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); - free_pages((unsigned long)vaddr, get_order(size)); + dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6c483ba98b9c..77dd0ad58be4 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -14,7 +14,7 @@ #include int swiotlb __read_mostly; -static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, +void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { @@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } -static void x86_swiotlb_free_coherent(struct device *dev, size_t size, +void x86_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, struct dma_attrs *attrs) { - swiotlb_free_coherent(dev, size, vaddr, dma_addr); + if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) + swiotlb_free_coherent(dev, size, vaddr, dma_addr); + else + dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); } static struct dma_map_ops swiotlb_dma_ops = { diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 9d8a509c9730..5ceda85b8687 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, { void *vaddr; - vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs); - if (!vaddr) - vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags); + vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs); *dma_handle = p2a(*dma_handle, to_pci_dev(dev)); return vaddr; } @@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, /* We have our own dma_ops: the same as swiotlb but from alloc (above) */ static struct dma_map_ops sta2x11_dma_ops = { .alloc = sta2x11_swiotlb_alloc_coherent, - .free = swiotlb_free_coherent, + .free = x86_swiotlb_free_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index a5ffd32642fd..e7a018eaf3a2 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { } #endif extern void swiotlb_print_info(void); +extern int is_swiotlb_buffer(phys_addr_t paddr); + #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index b604b831f4d1..649d097853a1 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -374,7 +374,7 @@ void __init swiotlb_free(void) io_tlb_nslabs = 0; } -static int is_swiotlb_buffer(phys_addr_t paddr) +int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= io_tlb_start && paddr < io_tlb_end; }