From 358cd9afd069992cf5170f02987011f51a163d1d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jun 2023 09:57:18 +0200 Subject: xen/pci: add flag for PCI passthrough being possible When running as a Xen PV guests passed through PCI devices only have a chance to work if the Xen supplied memory map has some PCI space reserved. Add a flag xen_pv_pci_possible which will be set in early boot in case the memory map has at least one area with the type E820_TYPE_RESERVED. Signed-off-by: Juergen Gross Signed-off-by: Christoph Hellwig --- include/xen/xen.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/xen/xen.h b/include/xen/xen.h index f989162983c3..a1e5b3f18d69 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -29,6 +29,12 @@ extern bool xen_pvh; extern uint32_t xen_start_flags; +#ifdef CONFIG_XEN_PV +extern bool xen_pv_pci_possible; +#else +#define xen_pv_pci_possible 0 +#endif + #include extern struct hvm_start_info pvh_start_info; void xen_prepare_pvh(void); -- cgit From 3d6f126b15d9fd8435455fffc912d976973a7a09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Jul 2023 14:25:42 +0200 Subject: dma-mapping: move arch_dma_set_mask() declaration to header This function has a __weak definition and an override that is only used on freescale powerpc chips. The powerpc definition however does not see the declaration that is in a .c file: arch/powerpc/kernel/dma-mask.c:7:6: error: no previous prototype for 'arch_dma_set_mask' [-Werror=missing-prototypes] Move it into the linux/dma-map-ops.h header where the other arch_dma_* functions are declared. Signed-off-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- arch/powerpc/kernel/dma-mask.c | 1 + include/linux/dma-map-ops.h | 6 ++++++ kernel/dma/mapping.c | 6 ------ 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c index ffbbbc432612..5b07ca7b73aa 100644 --- a/arch/powerpc/kernel/dma-mask.c +++ b/arch/powerpc/kernel/dma-mask.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 9bf19b5bf755..bb5e06fd359d 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -343,6 +343,12 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK +void arch_dma_set_mask(struct device *dev, u64 mask); +#else +#define arch_dma_set_mask(dev, mask) do { } while (0) +#endif + #ifdef CONFIG_MMU /* * Page protection so that devices that can't snoop CPU caches can use the diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 9a4db5cce600..e323ca48f7f2 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -760,12 +760,6 @@ bool dma_pci_p2pdma_supported(struct device *dev) } EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); -#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK -void arch_dma_set_mask(struct device *dev, u64 mask); -#else -#define arch_dma_set_mask(dev, mask) do { } while (0) -#endif - int dma_set_mask(struct device *dev, u64 mask) { /* -- cgit From 22e4a348f87c59df2c02f1efb7ba9a56b622c7b8 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Fri, 12 May 2023 17:42:10 +0800 Subject: dma-contiguous: support per-numa CMA for all architectures In the commit b7176c261cdb ("dma-contiguous: provide the ability to reserve per-numa CMA"), Barry adds DMA_PERNUMA_CMA for ARM64. But this feature is architecture independent, so support per-numa CMA for all architectures, and enable it by default if NUMA. Signed-off-by: Yajun Deng Tested-by: Yicong Yang Signed-off-by: Christoph Hellwig --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/arm64/mm/init.c | 2 -- include/linux/dma-map-ops.h | 6 ------ kernel/dma/Kconfig | 6 +++--- kernel/dma/contiguous.c | 8 +++++++- 5 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1457995fd41..bdf0ab6716c8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -696,7 +696,7 @@ kernel/dma/contiguous.c cma_pernuma=nn[MG] - [ARM64,KNL,CMA] + [KNL,CMA] Sets the size of kernel per-numa memory area for contiguous memory allocations. A value of 0 disables per-numa CMA altogether. And If this option is not diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d31c3a9290c5..527b0057434b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -434,8 +434,6 @@ void __init bootmem_init(void) arm64_hugetlb_cma_reserve(); #endif - dma_pernuma_cma_reserve(); - kvm_hyp_reserve(); /* diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index bb5e06fd359d..f2fc203fb8a1 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -169,12 +169,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, } #endif /* CONFIG_DMA_CMA*/ -#ifdef CONFIG_DMA_PERNUMA_CMA -void dma_pernuma_cma_reserve(void); -#else -static inline void dma_pernuma_cma_reserve(void) { } -#endif /* CONFIG_DMA_PERNUMA_CMA */ - #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 11d077003205..7afde9bc529f 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -147,10 +147,10 @@ if DMA_CMA config DMA_PERNUMA_CMA bool "Enable separate DMA Contiguous Memory Area for each NUMA Node" - default NUMA && ARM64 + default NUMA help - Enable this option to get pernuma CMA areas so that devices like - ARM64 SMMU can get local memory by DMA coherent APIs. + Enable this option to get pernuma CMA areas so that NUMA devices + can get local memory by DMA coherent APIs. You can set the size of pernuma CMA by specifying "cma_pernuma=size" on the kernel's command line. diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 6ea80ae42622..26a8e5365fcd 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -128,7 +128,7 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif #ifdef CONFIG_DMA_PERNUMA_CMA -void __init dma_pernuma_cma_reserve(void) +static void __init dma_pernuma_cma_reserve(void) { int nid; @@ -153,6 +153,10 @@ void __init dma_pernuma_cma_reserve(void) (unsigned long long)pernuma_size_bytes / SZ_1M, nid); } } +#else +static inline void __init dma_pernuma_cma_reserve(void) +{ +} #endif /** @@ -171,6 +175,8 @@ void __init dma_contiguous_reserve(phys_addr_t limit) phys_addr_t selected_limit = limit; bool fixed = false; + dma_pernuma_cma_reserve(); + pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); if (size_cmdline != -1) { -- cgit From 05ee774122bd4a2f298668d6d5fc9e7b685a5e31 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:23:57 +0200 Subject: swiotlb: make io_tlb_default_mem local to swiotlb.c SWIOTLB implementation details should not be exposed to the rest of the kernel. This will allow to make changes to the implementation without modifying non-swiotlb code. To avoid breaking existing users, provide helper functions for the few required fields. As a bonus, using a helper function to initialize struct device allows to get rid of an #ifdef in driver core. Signed-off-by: Petr Tesarik Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- arch/mips/pci/pci-octeon.c | 2 +- drivers/base/core.c | 4 +--- drivers/xen/swiotlb-xen.c | 2 +- include/linux/swiotlb.h | 25 ++++++++++++++++++++++++- kernel/dma/swiotlb.c | 39 ++++++++++++++++++++++++++++++++++++++- mm/slab_common.c | 5 ++--- 6 files changed, 67 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index e457a18cbdc5..d19d9d456309 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -664,7 +664,7 @@ static int __init octeon_pci_setup(void) /* BAR1 movable regions contiguous to cover the swiotlb */ octeon_bar1_pci_phys = - io_tlb_default_mem.start & ~((1ull << 22) - 1); + default_swiotlb_base() & ~((1ull << 22) - 1); for (index = 0; index < 32; index++) { union cvmx_pci_bar1_indexx bar1_index; diff --git a/drivers/base/core.c b/drivers/base/core.c index 3dff5037943e..46d1d78c5beb 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3108,9 +3108,7 @@ void device_initialize(struct device *dev) defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) dev->dma_coherent = dma_default_coherent; #endif -#ifdef CONFIG_SWIOTLB - dev->dma_io_tlb_mem = &io_tlb_default_mem; -#endif + swiotlb_dev_init(dev); } EXPORT_SYMBOL_GPL(device_initialize); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 67aa74d20162..946bd56f0ac5 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -381,7 +381,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, static int xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask; + return xen_phys_to_dma(hwdev, default_swiotlb_limit()) <= mask; } const struct dma_map_ops xen_swiotlb_dma_ops = { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 4e52cd5e0bdc..2d453b3e7771 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -110,7 +110,6 @@ struct io_tlb_mem { atomic_long_t used_hiwater; #endif }; -extern struct io_tlb_mem io_tlb_default_mem; static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { @@ -128,13 +127,22 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); +void swiotlb_dev_init(struct device *dev); size_t swiotlb_max_mapping_size(struct device *dev); +bool is_swiotlb_allocated(void); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); +phys_addr_t default_swiotlb_base(void); +phys_addr_t default_swiotlb_limit(void); #else static inline void swiotlb_init(bool addressing_limited, unsigned int flags) { } + +static inline void swiotlb_dev_init(struct device *dev) +{ +} + static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { return false; @@ -151,6 +159,11 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev) return SIZE_MAX; } +static inline bool is_swiotlb_allocated(void) +{ + return false; +} + static inline bool is_swiotlb_active(struct device *dev) { return false; @@ -159,6 +172,16 @@ static inline bool is_swiotlb_active(struct device *dev) static inline void swiotlb_adjust_size(unsigned long size) { } + +static inline phys_addr_t default_swiotlb_base(void) +{ + return 0; +} + +static inline phys_addr_t default_swiotlb_limit(void) +{ + return 0; +} #endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index ee57fd9949dc..0840bc15fb53 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -71,7 +71,7 @@ struct io_tlb_slot { static bool swiotlb_force_bounce; static bool swiotlb_force_disable; -struct io_tlb_mem io_tlb_default_mem; +static struct io_tlb_mem io_tlb_default_mem; static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; static unsigned long default_nareas; @@ -489,6 +489,15 @@ void __init swiotlb_exit(void) memset(mem, 0, sizeof(*mem)); } +/** + * swiotlb_dev_init() - initialize swiotlb fields in &struct device + * @dev: Device to be initialized. + */ +void swiotlb_dev_init(struct device *dev) +{ + dev->dma_io_tlb_mem = &io_tlb_default_mem; +} + /* * Return the offset into a iotlb slot required to keep the device happy. */ @@ -953,6 +962,14 @@ size_t swiotlb_max_mapping_size(struct device *dev) return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; } +/** + * is_swiotlb_allocated() - check if the default software IO TLB is initialized + */ +bool is_swiotlb_allocated(void) +{ + return io_tlb_default_mem.nslabs; +} + bool is_swiotlb_active(struct device *dev) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; @@ -960,6 +977,26 @@ bool is_swiotlb_active(struct device *dev) return mem && mem->nslabs; } +/** + * default_swiotlb_base() - get the base address of the default SWIOTLB + * + * Get the lowest physical address used by the default software IO TLB pool. + */ +phys_addr_t default_swiotlb_base(void) +{ + return io_tlb_default_mem.start; +} + +/** + * default_swiotlb_limit() - get the address limit of the default SWIOTLB + * + * Get the highest physical address used by the default software IO TLB pool. + */ +phys_addr_t default_swiotlb_limit(void) +{ + return io_tlb_default_mem.end - 1; +} + #ifdef CONFIG_DEBUG_FS static int io_tlb_used_get(void *data, u64 *val) diff --git a/mm/slab_common.c b/mm/slab_common.c index d1555ea2981a..c639dba9a3b7 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -864,10 +864,9 @@ void __init setup_kmalloc_cache_index_table(void) static unsigned int __kmalloc_minalign(void) { -#ifdef CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC - if (io_tlb_default_mem.nslabs) + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && + is_swiotlb_allocated()) return ARCH_KMALLOC_MINALIGN; -#endif return dma_get_cache_alignment(); } -- cgit From fea18777a78e845c6031128b40aaa2cf2cb46874 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:23:58 +0200 Subject: swiotlb: add documentation and rename swiotlb_do_find_slots() Add some kernel-doc comments and move the existing documentation of struct io_tlb_slot to its correct location. The latter was forgotten in commit 942a8186eb445 ("swiotlb: move struct io_tlb_slot to swiotlb.c"). Use the opportunity to give swiotlb_do_find_slots() a more descriptive name and make it clear how it differs from swiotlb_find_slots(). Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 15 ++++++++---- kernel/dma/swiotlb.c | 61 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 66 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 2d453b3e7771..31625ae507ea 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -76,10 +76,6 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and * @end. For default swiotlb, this is command line adjustable via * setup_io_tlb_npages. - * @list: The free list describing the number of free entries available - * from each index. - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. * @debugfs: The dentry to debugfs. * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced @@ -111,6 +107,17 @@ struct io_tlb_mem { #endif }; +/** + * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb + * @dev: Device which has mapped the buffer. + * @paddr: Physical address within the DMA buffer. + * + * Check if @paddr points into a bounce buffer. + * + * Return: + * * %true if @paddr points into a bounce buffer + * * %false otherwise + */ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0840bc15fb53..66793b59c290 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -62,6 +62,13 @@ #define INVALID_PHYS_ADDR (~(phys_addr_t)0) +/** + * struct io_tlb_slot - IO TLB slot descriptor + * @orig_addr: The original address corresponding to a mapped entry. + * @alloc_size: Size of the allocated buffer. + * @list: The free list describing the number of free entries available + * from each index. + */ struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; @@ -635,11 +642,22 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) } #endif /* CONFIG_DEBUG_FS */ -/* - * Find a suitable number of IO TLB entries size that will fit this request and - * allocate a buffer from that IO TLB pool. +/** + * swiotlb_area_find_slots() - search for slots in one IO TLB memory area + * @dev: Device which maps the buffer. + * @area_index: Index of the IO TLB memory area to be searched. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * + * Find a suitable sequence of IO TLB entries for the request and allocate + * a buffer from the given IO TLB memory area. + * This function takes care of locking. + * + * Return: Index of the first allocated slot, or -1 on error. */ -static int swiotlb_do_find_slots(struct device *dev, int area_index, +static int swiotlb_area_find_slots(struct device *dev, int area_index, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask) { @@ -734,6 +752,19 @@ found: return slot_index; } +/** + * swiotlb_find_slots() - search for slots in the whole swiotlb + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * + * Search through the whole software IO TLB to find a sequence of slots that + * match the allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. + */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask) { @@ -742,8 +773,8 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, int i = start, index; do { - index = swiotlb_do_find_slots(dev, i, orig_addr, alloc_size, - alloc_align_mask); + index = swiotlb_area_find_slots(dev, i, orig_addr, alloc_size, + alloc_align_mask); if (index >= 0) return index; if (++i >= mem->nareas) @@ -755,6 +786,15 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, #ifdef CONFIG_DEBUG_FS +/** + * mem_used() - get number of used slots in an allocator + * @mem: Software IO TLB allocator. + * + * The result is accurate in this version of the function, because an atomic + * counter is available if CONFIG_DEBUG_FS is set. + * + * Return: Number of used slots. + */ static unsigned long mem_used(struct io_tlb_mem *mem) { return atomic_long_read(&mem->total_used); @@ -762,6 +802,15 @@ static unsigned long mem_used(struct io_tlb_mem *mem) #else /* !CONFIG_DEBUG_FS */ +/** + * mem_used() - get number of used slots in an allocator + * @mem: Software IO TLB allocator. + * + * The result is not accurate, because there is no locking of individual + * areas. + * + * Return: Approximate number of used slots. + */ static unsigned long mem_used(struct io_tlb_mem *mem) { int i; -- cgit From 158dbe9c9a3d36da824139e5304169326550c6c9 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:23:59 +0200 Subject: swiotlb: separate memory pool data from other allocator data Carve out memory pool specific fields from struct io_tlb_mem. The original struct now contains shared data for the whole allocator, while the new struct io_tlb_pool contains data that is specific to one memory pool of (potentially) many. Signed-off-by: Petr Tesarik Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- include/linux/device.h | 2 +- include/linux/swiotlb.h | 45 ++++++++----- kernel/dma/swiotlb.c | 175 ++++++++++++++++++++++++++++++------------------ 3 files changed, 140 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index bbaeabd04b0d..d9754a68ba95 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -625,7 +625,7 @@ struct device_physical_location { * @dma_pools: Dma pools (if dma'ble device). * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations - * @dma_io_tlb_mem: Pointer to the swiotlb pool used. Not for driver use. + * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 31625ae507ea..247f0ab8795a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -62,8 +62,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, #ifdef CONFIG_SWIOTLB /** - * struct io_tlb_mem - IO TLB Memory Pool Descriptor - * + * struct io_tlb_pool - IO TLB memory pool descriptor * @start: The start address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. @@ -73,15 +72,34 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool * may be remapped in the memory encrypted case and store virtual * address for bounce buffer operation. - * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and - * @end. For default swiotlb, this is command line adjustable via - * setup_io_tlb_npages. + * @nslabs: The number of IO TLB slots between @start and @end. For the + * default swiotlb, this can be adjusted with a boot parameter, + * see setup_io_tlb_npages(). + * @late_alloc: %true if allocated using the page allocator. + * @nareas: Number of areas in the pool. + * @area_nslabs: Number of slots in each area. + * @areas: Array of memory area descriptors. + * @slots: Array of slot descriptors. + */ +struct io_tlb_pool { + phys_addr_t start; + phys_addr_t end; + void *vaddr; + unsigned long nslabs; + bool late_alloc; + unsigned int nareas; + unsigned int area_nslabs; + struct io_tlb_area *areas; + struct io_tlb_slot *slots; +}; + +/** + * struct io_tlb_mem - Software IO TLB allocator + * @defpool: Default (initial) IO TLB memory pool descriptor. + * @nslabs: Total number of IO TLB slabs in all pools. * @debugfs: The dentry to debugfs. - * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation - * @nareas: The area number in the pool. - * @area_nslabs: The slot number in the area. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -89,18 +107,11 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * in debugfs. */ struct io_tlb_mem { - phys_addr_t start; - phys_addr_t end; - void *vaddr; + struct io_tlb_pool defpool; unsigned long nslabs; struct dentry *debugfs; - bool late_alloc; bool force_bounce; bool for_alloc; - unsigned int nareas; - unsigned int area_nslabs; - struct io_tlb_area *areas; - struct io_tlb_slot *slots; #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; @@ -122,7 +133,7 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - return mem && paddr >= mem->start && paddr < mem->end; + return mem && paddr >= mem->defpool.start && paddr < mem->defpool.end; } static inline bool is_swiotlb_force_bounce(struct device *dev) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 66793b59c290..6fc2606e014b 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -209,7 +209,7 @@ void __init swiotlb_adjust_size(unsigned long size) void swiotlb_print_info(void) { - struct io_tlb_mem *mem = &io_tlb_default_mem; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; if (!mem->nslabs) { pr_warn("No low mem\n"); @@ -238,7 +238,7 @@ static inline unsigned long nr_slots(u64 val) */ void __init swiotlb_update_mem_attributes(void) { - struct io_tlb_mem *mem = &io_tlb_default_mem; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long bytes; if (!mem->nslabs || mem->late_alloc) @@ -247,9 +247,8 @@ void __init swiotlb_update_mem_attributes(void) set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT); } -static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, - unsigned long nslabs, unsigned int flags, - bool late_alloc, unsigned int nareas) +static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, + unsigned long nslabs, bool late_alloc, unsigned int nareas) { void *vaddr = phys_to_virt(start); unsigned long bytes = nslabs << IO_TLB_SHIFT, i; @@ -261,8 +260,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->nareas = nareas; mem->area_nslabs = nslabs / mem->nareas; - mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); - for (i = 0; i < mem->nareas; i++) { spin_lock_init(&mem->areas[i].lock); mem->areas[i].index = 0; @@ -319,7 +316,7 @@ static void __init *swiotlb_memblock_alloc(unsigned long nslabs, void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) { - struct io_tlb_mem *mem = &io_tlb_default_mem; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long nslabs; unsigned int nareas; size_t alloc_size; @@ -330,6 +327,9 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, if (swiotlb_force_disable) return; + io_tlb_default_mem.force_bounce = + swiotlb_force_bounce || (flags & SWIOTLB_FORCE); + if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); @@ -363,8 +363,9 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, return; } - swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false, - default_nareas); + swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, + default_nareas); + io_tlb_default_mem.nslabs = nslabs; if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); @@ -383,7 +384,7 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags) int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)) { - struct io_tlb_mem *mem = &io_tlb_default_mem; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); unsigned int nareas; unsigned char *vstart = NULL; @@ -397,6 +398,8 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, if (swiotlb_force_disable) return 0; + io_tlb_default_mem.force_bounce = swiotlb_force_bounce; + if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); @@ -448,8 +451,9 @@ retry: set_memory_decrypted((unsigned long)vstart, (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); - swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true, - nareas); + swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true, + nareas); + io_tlb_default_mem.nslabs = nslabs; swiotlb_print_info(); return 0; @@ -463,7 +467,7 @@ error_area: void __init swiotlb_exit(void) { - struct io_tlb_mem *mem = &io_tlb_default_mem; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long tbl_vaddr; size_t tbl_size, slots_size; unsigned int area_order; @@ -519,7 +523,7 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool; int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; @@ -601,7 +605,7 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask) return nr_slots(boundary_mask + 1); } -static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index) +static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index) { if (index >= mem->area_nslabs) return 0; @@ -645,6 +649,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) /** * swiotlb_area_find_slots() - search for slots in one IO TLB memory area * @dev: Device which maps the buffer. + * @pool: Memory pool to be searched. * @area_index: Index of the IO TLB memory area to be searched. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, @@ -657,15 +662,14 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) * * Return: Index of the first allocated slot, or -1 on error. */ -static int swiotlb_area_find_slots(struct device *dev, int area_index, - phys_addr_t orig_addr, size_t alloc_size, +static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, + int area_index, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - struct io_tlb_area *area = mem->areas + area_index; + struct io_tlb_area *area = pool->areas + area_index; unsigned long boundary_mask = dma_get_seg_boundary(dev); dma_addr_t tbl_dma_addr = - phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; + phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); unsigned int iotlb_align_mask = dma_get_min_align_mask(dev) | alloc_align_mask; @@ -677,7 +681,7 @@ static int swiotlb_area_find_slots(struct device *dev, int area_index, unsigned int slot_index; BUG_ON(!nslots); - BUG_ON(area_index >= mem->nareas); + BUG_ON(area_index >= pool->nareas); /* * For allocations of PAGE_SIZE or larger only look for page aligned @@ -694,19 +698,19 @@ static int swiotlb_area_find_slots(struct device *dev, int area_index, stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; spin_lock_irqsave(&area->lock, flags); - if (unlikely(nslots > mem->area_nslabs - area->used)) + if (unlikely(nslots > pool->area_nslabs - area->used)) goto not_found; - slot_base = area_index * mem->area_nslabs; + slot_base = area_index * pool->area_nslabs; index = area->index; - for (slots_checked = 0; slots_checked < mem->area_nslabs; ) { + for (slots_checked = 0; slots_checked < pool->area_nslabs; ) { slot_index = slot_base + index; if (orig_addr && (slot_addr(tbl_dma_addr, slot_index) & iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { - index = wrap_area_index(mem, index + 1); + index = wrap_area_index(pool, index + 1); slots_checked++; continue; } @@ -719,10 +723,10 @@ static int swiotlb_area_find_slots(struct device *dev, int area_index, if (!iommu_is_span_boundary(slot_index, nslots, nr_slots(tbl_dma_addr), max_slots)) { - if (mem->slots[slot_index].list >= nslots) + if (pool->slots[slot_index].list >= nslots) goto found; } - index = wrap_area_index(mem, index + stride); + index = wrap_area_index(pool, index + stride); slots_checked += stride; } @@ -732,58 +736,79 @@ not_found: found: for (i = slot_index; i < slot_index + nslots; i++) { - mem->slots[i].list = 0; - mem->slots[i].alloc_size = alloc_size - (offset + + pool->slots[i].list = 0; + pool->slots[i].alloc_size = alloc_size - (offset + ((i - slot_index) << IO_TLB_SHIFT)); } for (i = slot_index - 1; io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && - mem->slots[i].list; i--) - mem->slots[i].list = ++count; + pool->slots[i].list; i--) + pool->slots[i].list = ++count; /* * Update the indices to avoid searching in the next round. */ - area->index = wrap_area_index(mem, index + nslots); + area->index = wrap_area_index(pool, index + nslots); area->used += nslots; spin_unlock_irqrestore(&area->lock, flags); - inc_used_and_hiwater(mem, nslots); + inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots); return slot_index; } /** - * swiotlb_find_slots() - search for slots in the whole swiotlb + * swiotlb_pool_find_slots() - search for slots in one memory pool * @dev: Device which maps the buffer. + * @pool: Memory pool to be searched. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. * - * Search through the whole software IO TLB to find a sequence of slots that - * match the allocation constraints. + * Search through one memory pool to find a sequence of slots that match the + * allocation constraints. * * Return: Index of the first allocated slot, or -1 on error. */ -static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, - size_t alloc_size, unsigned int alloc_align_mask) +static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool, + phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - int start = raw_smp_processor_id() & (mem->nareas - 1); + int start = raw_smp_processor_id() & (pool->nareas - 1); int i = start, index; do { - index = swiotlb_area_find_slots(dev, i, orig_addr, alloc_size, - alloc_align_mask); + index = swiotlb_area_find_slots(dev, pool, i, orig_addr, + alloc_size, alloc_align_mask); if (index >= 0) return index; - if (++i >= mem->nareas) + if (++i >= pool->nareas) i = 0; } while (i != start); return -1; } +/** + * swiotlb_find_slots() - search for slots in the whole swiotlb + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * + * Search through the whole software IO TLB to find a sequence of slots that + * match the allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size, unsigned int alloc_align_mask) +{ + return swiotlb_pool_find_slots(dev, &dev->dma_io_tlb_mem->defpool, + orig_addr, alloc_size, alloc_align_mask); +} + #ifdef CONFIG_DEBUG_FS /** @@ -802,6 +827,24 @@ static unsigned long mem_used(struct io_tlb_mem *mem) #else /* !CONFIG_DEBUG_FS */ +/** + * mem_pool_used() - get number of used slots in a memory pool + * @pool: Software IO TLB memory pool. + * + * The result is not accurate, see mem_used(). + * + * Return: Approximate number of used slots. + */ +static unsigned long mem_pool_used(struct io_tlb_pool *pool) +{ + int i; + unsigned long used = 0; + + for (i = 0; i < pool->nareas; i++) + used += pool->areas[i].used; + return used; +} + /** * mem_used() - get number of used slots in an allocator * @mem: Software IO TLB allocator. @@ -813,12 +856,7 @@ static unsigned long mem_used(struct io_tlb_mem *mem) */ static unsigned long mem_used(struct io_tlb_mem *mem) { - int i; - unsigned long used = 0; - - for (i = 0; i < mem->nareas; i++) - used += mem->areas[i].used; - return used; + return mem_pool_used(&mem->defpool); } #endif /* CONFIG_DEBUG_FS */ @@ -830,6 +868,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned int offset = swiotlb_align_offset(dev, orig_addr); + struct io_tlb_pool *pool; unsigned int i; int index; phys_addr_t tlb_addr; @@ -864,9 +903,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ + pool = &mem->defpool; for (i = 0; i < nr_slots(alloc_size + offset); i++) - mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); - tlb_addr = slot_addr(mem->start, index) + offset; + pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); + tlb_addr = slot_addr(pool->start, index) + offset; /* * When dir == DMA_FROM_DEVICE we could omit the copy from the orig * to the tlb buffer, if we knew for sure the device will @@ -880,7 +920,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool; unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, tlb_addr); int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; @@ -924,7 +964,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) area->used -= nslots; spin_unlock_irqrestore(&area->lock, flags); - dec_used(mem, nslots); + dec_used(dev->dma_io_tlb_mem, nslots); } /* @@ -1033,7 +1073,7 @@ bool is_swiotlb_active(struct device *dev) */ phys_addr_t default_swiotlb_base(void) { - return io_tlb_default_mem.start; + return io_tlb_default_mem.defpool.start; } /** @@ -1043,7 +1083,7 @@ phys_addr_t default_swiotlb_base(void) */ phys_addr_t default_swiotlb_limit(void) { - return io_tlb_default_mem.end - 1; + return io_tlb_default_mem.defpool.end - 1; } #ifdef CONFIG_DEBUG_FS @@ -1119,6 +1159,7 @@ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, struct page *swiotlb_alloc(struct device *dev, size_t size) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; phys_addr_t tlb_addr; int index; @@ -1129,7 +1170,8 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (index == -1) return NULL; - tlb_addr = slot_addr(mem->start, index); + pool = &mem->defpool; + tlb_addr = slot_addr(pool->start, index); return pfn_to_page(PFN_DOWN(tlb_addr)); } @@ -1166,29 +1208,34 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, * to it. */ if (!mem) { + struct io_tlb_pool *pool; + mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM; + pool = &mem->defpool; - mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL); - if (!mem->slots) { + pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL); + if (!pool->slots) { kfree(mem); return -ENOMEM; } - mem->areas = kcalloc(nareas, sizeof(*mem->areas), + pool->areas = kcalloc(nareas, sizeof(*pool->areas), GFP_KERNEL); - if (!mem->areas) { - kfree(mem->slots); + if (!pool->areas) { + kfree(pool->slots); kfree(mem); return -ENOMEM; } set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), rmem->size >> PAGE_SHIFT); - swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE, - false, nareas); + swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs, + false, nareas); + mem->force_bounce = true; mem->for_alloc = true; + mem->nslabs = nslabs; rmem->priv = mem; -- cgit From 62708b2ba4055cad43a95754e939566b56dde5b6 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:00 +0200 Subject: swiotlb: add a flag whether SWIOTLB is allowed to grow Add a config option (CONFIG_SWIOTLB_DYNAMIC) to enable or disable dynamic allocation of additional bounce buffers. If this option is set, mark the default SWIOTLB as able to grow and restricted DMA pools as unable. However, if the address of the default memory pool is explicitly queried, make the default SWIOTLB also unable to grow. This is currently used to set up PCI BAR movable regions on some Octeon MIPS boards which may not be able to use a SWIOTLB pool elsewhere in physical memory. See octeon_pci_setup() for more details. If a remap function is specified, it must be also called on any dynamically allocated pools, but there are some issues: - The remap function may block, so it should not be called from an atomic context. - There is no corresponding unremap() function if the memory pool is freed. - The only in-tree implementation (xen_swiotlb_fixup) requires that the number of slots in the memory pool is a multiple of SWIOTLB_SEGSIZE. Keep it simple for now and disable growing the SWIOTLB if a remap function was specified. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 4 ++++ kernel/dma/Kconfig | 13 +++++++++++++ kernel/dma/swiotlb.c | 13 +++++++++++++ 3 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 247f0ab8795a..57be2a0a9fbf 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -100,6 +100,7 @@ struct io_tlb_pool { * @debugfs: The dentry to debugfs. * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation + * @can_grow: %true if more pools can be allocated dynamically. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -112,6 +113,9 @@ struct io_tlb_mem { struct dentry *debugfs; bool force_bounce; bool for_alloc; +#ifdef CONFIG_SWIOTLB_DYNAMIC + bool can_grow; +#endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 562463fe30ea..4c1e9a3c0ab6 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -90,6 +90,19 @@ config SWIOTLB bool select NEED_DMA_MAP_STATE +config SWIOTLB_DYNAMIC + bool "Dynamic allocation of DMA bounce buffers" + default n + depends on SWIOTLB + help + This enables dynamic resizing of the software IO TLB. The kernel + starts with one memory pool at boot and it will allocate additional + pools as needed. To reduce run-time kernel memory requirements, you + may have to specify a smaller size of the initial pool using + "swiotlb=" on the kernel command line. + + If unsure, say N. + config DMA_BOUNCE_UNALIGNED_KMALLOC bool depends on SWIOTLB diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 6fc2606e014b..767c8fb36a6b 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -330,6 +330,11 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, io_tlb_default_mem.force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); +#ifdef CONFIG_SWIOTLB_DYNAMIC + if (!remap) + io_tlb_default_mem.can_grow = true; +#endif + if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); @@ -400,6 +405,11 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, io_tlb_default_mem.force_bounce = swiotlb_force_bounce; +#ifdef CONFIG_SWIOTLB_DYNAMIC + if (!remap) + io_tlb_default_mem.can_grow = true; +#endif + if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); @@ -1073,6 +1083,9 @@ bool is_swiotlb_active(struct device *dev) */ phys_addr_t default_swiotlb_base(void) { +#ifdef CONFIG_SWIOTLB_DYNAMIC + io_tlb_default_mem.can_grow = false; +#endif return io_tlb_default_mem.defpool.start; } -- cgit From 79636caad3618e2b38457f6e298c9b31ba82b489 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:01 +0200 Subject: swiotlb: if swiotlb is full, fall back to a transient memory pool Try to allocate a transient memory pool if no suitable slots can be found and the respective SWIOTLB is allowed to grow. The transient pool is just enough big for this one bounce buffer. It is inserted into a per-device list of transient memory pools, and it is freed again when the bounce buffer is unmapped. Transient memory pools are kept in an RCU list. A memory barrier is required after adding a new entry, because any address within a transient buffer must be immediately recognized as belonging to the SWIOTLB, even if it is passed to another CPU. Deletion does not require any synchronization beyond RCU ordering guarantees. After a buffer is unmapped, its physical addresses may no longer be passed to the DMA API, so the memory range of the corresponding stale entry in the RCU list never matches. If the memory range gets allocated again, then it happens only after a RCU quiescent state. Since bounce buffers can now be allocated from different pools, add a parameter to swiotlb_alloc_pool() to let the caller know which memory pool is used. Add swiotlb_find_pool() to find the memory pool corresponding to an address. This function is now also used by is_swiotlb_buffer(), because a simple boundary check is no longer sufficient. The logic in swiotlb_alloc_tlb() is taken from __dma_direct_alloc_pages(), simplified and enhanced to use coherent memory pools if needed. Note that this is not the most efficient way to provide a bounce buffer, but when a DMA buffer can't be mapped, something may (and will) actually break. At that point it is better to make an allocation, even if it may be an expensive operation. Signed-off-by: Petr Tesarik Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- include/linux/device.h | 6 + include/linux/dma-mapping.h | 2 + include/linux/swiotlb.h | 29 +++- kernel/dma/direct.c | 2 +- kernel/dma/swiotlb.c | 316 ++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 345 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index d9754a68ba95..5fd89c9d005c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -626,6 +626,8 @@ struct device_physical_location { * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. + * @dma_io_tlb_pools: List of transient swiotlb memory pools. + * @dma_io_tlb_lock: Protects changes to the list of active pools. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -731,6 +733,10 @@ struct device { #endif #ifdef CONFIG_SWIOTLB struct io_tlb_mem *dma_io_tlb_mem; +#endif +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head dma_io_tlb_pools; + spinlock_t dma_io_tlb_lock; #endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e13050eb9777..f0ccca16a0ac 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -418,6 +418,8 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); + static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 57be2a0a9fbf..66867d2188ba 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -80,6 +80,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @area_nslabs: Number of slots in each area. * @areas: Array of memory area descriptors. * @slots: Array of slot descriptors. + * @node: Member of the IO TLB memory pool list. + * @rcu: RCU head for swiotlb_dyn_free(). + * @transient: %true if transient memory pool. */ struct io_tlb_pool { phys_addr_t start; @@ -91,6 +94,11 @@ struct io_tlb_pool { unsigned int area_nslabs; struct io_tlb_area *areas; struct io_tlb_slot *slots; +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head node; + struct rcu_head rcu; + bool transient; +#endif }; /** @@ -122,6 +130,20 @@ struct io_tlb_mem { #endif }; +#ifdef CONFIG_SWIOTLB_DYNAMIC + +struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); + +#else + +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) +{ + return &dev->dma_io_tlb_mem->defpool; +} + +#endif + /** * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb * @dev: Device which has mapped the buffer. @@ -137,7 +159,12 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - return mem && paddr >= mem->defpool.start && paddr < mem->defpool.end; + if (!mem) + return false; + + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) + return swiotlb_find_pool(dev, paddr); + return paddr >= mem->defpool.start && paddr < mem->defpool.end; } static inline bool is_swiotlb_force_bounce(struct device *dev) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index d29cade048db..9596ae1aa0da 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -66,7 +66,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit) return 0; } -static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 767c8fb36a6b..30d0fcc3ccb9 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -510,6 +511,211 @@ void __init swiotlb_exit(void) memset(mem, 0, sizeof(*mem)); } +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * alloc_dma_pages() - allocate pages to be used for DMA + * @gfp: GFP flags for the allocation. + * @bytes: Size of the buffer. + * + * Allocate pages from the buddy allocator. If successful, make the allocated + * pages decrypted that they can be used for DMA. + * + * Return: Decrypted pages, or %NULL on failure. + */ +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes) +{ + unsigned int order = get_order(bytes); + struct page *page; + void *vaddr; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + + vaddr = page_address(page); + if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) + goto error; + return page; + +error: + __free_pages(page, order); + return NULL; +} + +/** + * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer + * @dev: Device for which a memory pool is allocated. + * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. + * @gfp: GFP flags for the allocation. + * + * Return: Allocated pages, or %NULL on allocation failure. + */ +static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, + u64 phys_limit, gfp_t gfp) +{ + struct page *page; + + /* + * Allocate from the atomic pools if memory is encrypted and + * the allocation is atomic, because decrypting may block. + */ + if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { + void *vaddr; + + if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) + return NULL; + + return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, + dma_coherent_ok); + } + + gfp &= ~GFP_ZONEMASK; + if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + gfp |= __GFP_DMA; + else if (phys_limit <= DMA_BIT_MASK(32)) + gfp |= __GFP_DMA32; + + while ((page = alloc_dma_pages(gfp, bytes)) && + page_to_phys(page) + bytes - 1 > phys_limit) { + /* allocated, but too high */ + __free_pages(page, get_order(bytes)); + + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + phys_limit < DMA_BIT_MASK(64) && + !(gfp & (__GFP_DMA32 | __GFP_DMA))) + gfp |= __GFP_DMA32; + else if (IS_ENABLED(CONFIG_ZONE_DMA) && + !(gfp & __GFP_DMA)) + gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; + else + return NULL; + } + + return page; +} + +/** + * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer + * @vaddr: Virtual address of the buffer. + * @bytes: Size of the buffer. + */ +static void swiotlb_free_tlb(void *vaddr, size_t bytes) +{ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(NULL, vaddr, bytes)) + return; + + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(virt_to_page(vaddr), get_order(bytes)); +} + +/** + * swiotlb_alloc_pool() - allocate a new IO TLB memory pool + * @dev: Device for which a memory pool is allocated. + * @nslabs: Desired number of slabs. + * @phys_limit: Maximum DMA buffer physical address. + * @gfp: GFP flags for the allocations. + * + * Allocate and initialize a new IO TLB memory pool. + * + * Return: New memory pool, or %NULL on allocation failure. + */ +static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, + unsigned int nslabs, u64 phys_limit, gfp_t gfp) +{ + struct io_tlb_pool *pool; + struct page *tlb; + size_t pool_size; + size_t tlb_size; + + pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), 1) + + array_size(sizeof(*pool->slots), nslabs); + pool = kzalloc(pool_size, gfp); + if (!pool) + goto error; + pool->areas = (void *)pool + sizeof(*pool); + pool->slots = (void *)pool->areas + sizeof(*pool->areas); + + tlb_size = nslabs << IO_TLB_SHIFT; + tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp); + if (!tlb) + goto error_tlb; + + swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, 1); + return pool; + +error_tlb: + kfree(pool); +error: + return NULL; +} + +/** + * swiotlb_dyn_free() - RCU callback to free a memory pool + * @rcu: RCU head in the corresponding struct io_tlb_pool. + */ +static void swiotlb_dyn_free(struct rcu_head *rcu) +{ + struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu); + size_t tlb_size = pool->end - pool->start; + + swiotlb_free_tlb(pool->vaddr, tlb_size); + kfree(pool); +} + +/** + * swiotlb_find_pool() - find the IO TLB pool for a physical address + * @dev: Device which has mapped the DMA buffer. + * @paddr: Physical address within the DMA buffer. + * + * Find the IO TLB memory pool descriptor which contains the given physical + * address, if any. + * + * Return: Memory pool which contains @paddr, or %NULL if none. + */ +struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool = &mem->defpool; + + if (paddr >= pool->start && paddr < pool->end) + return pool; + + /* Pairs with smp_wmb() in swiotlb_find_slots(). */ + smp_rmb(); + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) { + if (paddr >= pool->start && paddr < pool->end) + goto out; + } + pool = NULL; +out: + rcu_read_unlock(); + return pool; +} + +/** + * swiotlb_del_pool() - remove an IO TLB pool from a device + * @dev: Owning device. + * @pool: Memory pool to be removed. + */ +static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); + list_del_rcu(&pool->node); + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); + + call_rcu(&pool->rcu, swiotlb_dyn_free); +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + /** * swiotlb_dev_init() - initialize swiotlb fields in &struct device * @dev: Device to be initialized. @@ -517,6 +723,10 @@ void __init swiotlb_exit(void) void swiotlb_dev_init(struct device *dev) { dev->dma_io_tlb_mem = &io_tlb_default_mem; +#ifdef CONFIG_SWIOTLB_DYNAMIC + INIT_LIST_HEAD(&dev->dma_io_tlb_pools); + spin_lock_init(&dev->dma_io_tlb_lock); +#endif } /* @@ -533,7 +743,7 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { - struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool; + struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; @@ -799,6 +1009,8 @@ static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool, return -1; } +#ifdef CONFIG_SWIOTLB_DYNAMIC + /** * swiotlb_find_slots() - search for slots in the whole swiotlb * @dev: Device which maps the buffer. @@ -806,6 +1018,7 @@ static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool, * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. + * @retpool: Used memory pool, updated on return. * * Search through the whole software IO TLB to find a sequence of slots that * match the allocation constraints. @@ -813,12 +1026,64 @@ static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool, * Return: Index of the first allocated slot, or -1 on error. */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, - size_t alloc_size, unsigned int alloc_align_mask) + size_t alloc_size, unsigned int alloc_align_mask, + struct io_tlb_pool **retpool) { - return swiotlb_pool_find_slots(dev, &dev->dma_io_tlb_mem->defpool, + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + unsigned long nslabs; + unsigned long flags; + u64 phys_limit; + int index; + + pool = &mem->defpool; + index = swiotlb_pool_find_slots(dev, pool, orig_addr, + alloc_size, alloc_align_mask); + if (index >= 0) + goto found; + + if (!mem->can_grow) + return -1; + + nslabs = nr_slots(alloc_size); + phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); + pool = swiotlb_alloc_pool(dev, nslabs, phys_limit, + GFP_NOWAIT | __GFP_NOWARN); + if (!pool) + return -1; + + index = swiotlb_pool_find_slots(dev, pool, orig_addr, + alloc_size, alloc_align_mask); + if (index < 0) { + swiotlb_dyn_free(&pool->rcu); + return -1; + } + + pool->transient = true; + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); + list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); + + /* Pairs with smp_rmb() in swiotlb_find_pool(). */ + smp_wmb(); +found: + *retpool = pool; + return index; +} + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size, unsigned int alloc_align_mask, + struct io_tlb_pool **retpool) +{ + *retpool = &dev->dma_io_tlb_mem->defpool; + return swiotlb_pool_find_slots(dev, *retpool, orig_addr, alloc_size, alloc_align_mask); } +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + #ifdef CONFIG_DEBUG_FS /** @@ -899,7 +1164,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, } index = swiotlb_find_slots(dev, orig_addr, - alloc_size + offset, alloc_align_mask); + alloc_size + offset, alloc_align_mask, &pool); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, @@ -913,7 +1178,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - pool = &mem->defpool; for (i = 0; i < nr_slots(alloc_size + offset); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; @@ -930,7 +1194,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) { - struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool; + struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, tlb_addr); int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; @@ -977,6 +1241,41 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) dec_used(dev->dma_io_tlb_mem, nslots); } +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * swiotlb_del_transient() - delete a transient memory pool + * @dev: Device which mapped the buffer. + * @tlb_addr: Physical address within a bounce buffer. + * + * Check whether the address belongs to a transient SWIOTLB memory pool. + * If yes, then delete the pool. + * + * Return: %true if @tlb_addr belonged to a transient pool that was released. + */ +static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) +{ + struct io_tlb_pool *pool; + + pool = swiotlb_find_pool(dev, tlb_addr); + if (!pool->transient) + return false; + + dec_used(dev->dma_io_tlb_mem, pool->nslabs); + swiotlb_del_pool(dev, pool); + return true; +} + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static inline bool swiotlb_del_transient(struct device *dev, + phys_addr_t tlb_addr) +{ + return false; +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + /* * tlb_addr is the physical address of the bounce buffer to unmap. */ @@ -991,6 +1290,8 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); + if (swiotlb_del_transient(dev, tlb_addr)) + return; swiotlb_release_slots(dev, tlb_addr); } @@ -1179,11 +1480,10 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (!mem) return NULL; - index = swiotlb_find_slots(dev, 0, size, 0); + index = swiotlb_find_slots(dev, 0, size, 0, &pool); if (index == -1) return NULL; - pool = &mem->defpool; tlb_addr = slot_addr(pool->start, index); return pfn_to_page(PFN_DOWN(tlb_addr)); -- cgit From ad96ce3252dbab773cb343220662df3d84dd8e80 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:02 +0200 Subject: swiotlb: determine potential physical address limit The value returned by default_swiotlb_limit() should be constant, because it is used to decide whether DMA can be used. To allow allocating memory pools on the fly, use the maximum possible physical address rather than the highest address used by the default pool. For swiotlb_init_remap(), this is either an arch-specific limit used by memblock_alloc_low(), or the highest directly mapped physical address if the initialization flags include SWIOTLB_ANY. For swiotlb_init_late(), the highest address is determined by the GFP flags. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 2 ++ kernel/dma/swiotlb.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 66867d2188ba..9825fa14abe4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -109,6 +109,7 @@ struct io_tlb_pool { * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation * @can_grow: %true if more pools can be allocated dynamically. + * @phys_limit: Maximum allowed physical address. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -123,6 +124,7 @@ struct io_tlb_mem { bool for_alloc; #ifdef CONFIG_SWIOTLB_DYNAMIC bool can_grow; + u64 phys_limit; #endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 30d0fcc3ccb9..0fa081defdbd 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -334,6 +334,10 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, #ifdef CONFIG_SWIOTLB_DYNAMIC if (!remap) io_tlb_default_mem.can_grow = true; + if (flags & SWIOTLB_ANY) + io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); + else + io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT; #endif if (!default_nareas) @@ -409,6 +413,12 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, #ifdef CONFIG_SWIOTLB_DYNAMIC if (!remap) io_tlb_default_mem.can_grow = true; + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) + io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits); + else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) + io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32); + else + io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); #endif if (!default_nareas) @@ -1397,7 +1407,11 @@ phys_addr_t default_swiotlb_base(void) */ phys_addr_t default_swiotlb_limit(void) { +#ifdef CONFIG_SWIOTLB_DYNAMIC + return io_tlb_default_mem.phys_limit; +#else return io_tlb_default_mem.defpool.end - 1; +#endif } #ifdef CONFIG_DEBUG_FS -- cgit From 1aaa736815eb04f4dae3f0b3e977b2a0677a4cfb Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:03 +0200 Subject: swiotlb: allocate a new memory pool when existing pools are full When swiotlb_find_slots() cannot find suitable slots, schedule the allocation of a new memory pool. It is not possible to allocate the pool immediately, because this code may run in interrupt context, which is not suitable for large memory allocations. This means that the memory pool will be available too late for the currently requested mapping, but the stress on the software IO TLB allocator is likely to continue, and subsequent allocations will benefit from the additional pool eventually. Keep all memory pools for an allocator in an RCU list to avoid locking on the read side. For modifications, add a new spinlock to struct io_tlb_mem. The spinlock also protects updates to the total number of slabs (nslabs in struct io_tlb_mem), but not reads of the value. Readers may therefore encounter a stale value, but this is not an issue: - swiotlb_tbl_map_single() and is_swiotlb_active() only check for non-zero value. This is ensured by the existence of the default memory pool, allocated at boot. - The exact value is used only for non-critical purposes (debugfs, kernel messages). Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 8 +++ kernel/dma/swiotlb.c | 148 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 131 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 9825fa14abe4..8371c92a0271 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -8,6 +8,7 @@ #include #include #include +#include struct device; struct page; @@ -104,12 +105,16 @@ struct io_tlb_pool { /** * struct io_tlb_mem - Software IO TLB allocator * @defpool: Default (initial) IO TLB memory pool descriptor. + * @pool: IO TLB memory pool descriptor (if not dynamic). * @nslabs: Total number of IO TLB slabs in all pools. * @debugfs: The dentry to debugfs. * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation * @can_grow: %true if more pools can be allocated dynamically. * @phys_limit: Maximum allowed physical address. + * @lock: Lock to synchronize changes to the list. + * @pools: List of IO TLB memory pool descriptors (if dynamic). + * @dyn_alloc: Dynamic IO TLB pool allocation work. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -125,6 +130,9 @@ struct io_tlb_mem { #ifdef CONFIG_SWIOTLB_DYNAMIC bool can_grow; u64 phys_limit; + spinlock_t lock; + struct list_head pools; + struct work_struct dyn_alloc; #endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0fa081defdbd..adf80dec42d7 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -79,8 +79,23 @@ struct io_tlb_slot { static bool swiotlb_force_bounce; static bool swiotlb_force_disable; +#ifdef CONFIG_SWIOTLB_DYNAMIC + +static void swiotlb_dyn_alloc(struct work_struct *work); + +static struct io_tlb_mem io_tlb_default_mem = { + .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock), + .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools), + .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc, + swiotlb_dyn_alloc), +}; + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + static struct io_tlb_mem io_tlb_default_mem; +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; static unsigned long default_nareas; @@ -278,6 +293,23 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, return; } +/** + * add_mem_pool() - add a memory pool to the allocator + * @mem: Software IO TLB allocator. + * @pool: Memory pool to be added. + */ +static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool) +{ +#ifdef CONFIG_SWIOTLB_DYNAMIC + spin_lock(&mem->lock); + list_add_rcu(&pool->node, &mem->pools); + mem->nslabs += pool->nslabs; + spin_unlock(&mem->lock); +#else + mem->nslabs = pool->nslabs; +#endif +} + static void __init *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) @@ -375,7 +407,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, default_nareas); - io_tlb_default_mem.nslabs = nslabs; + add_mem_pool(&io_tlb_default_mem, mem); if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); @@ -474,7 +506,7 @@ retry: (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true, nareas); - io_tlb_default_mem.nslabs = nslabs; + add_mem_pool(&io_tlb_default_mem, mem); swiotlb_print_info(); return 0; @@ -625,44 +657,83 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes) /** * swiotlb_alloc_pool() - allocate a new IO TLB memory pool * @dev: Device for which a memory pool is allocated. - * @nslabs: Desired number of slabs. + * @minslabs: Minimum number of slabs. + * @nslabs: Desired (maximum) number of slabs. + * @nareas: Number of areas. * @phys_limit: Maximum DMA buffer physical address. * @gfp: GFP flags for the allocations. * - * Allocate and initialize a new IO TLB memory pool. + * Allocate and initialize a new IO TLB memory pool. The actual number of + * slabs may be reduced if allocation of @nslabs fails. If even + * @minslabs cannot be allocated, this function fails. * * Return: New memory pool, or %NULL on allocation failure. */ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, - unsigned int nslabs, u64 phys_limit, gfp_t gfp) + unsigned long minslabs, unsigned long nslabs, + unsigned int nareas, u64 phys_limit, gfp_t gfp) { struct io_tlb_pool *pool; + unsigned int slot_order; struct page *tlb; size_t pool_size; size_t tlb_size; - pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), 1) + - array_size(sizeof(*pool->slots), nslabs); + pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); pool = kzalloc(pool_size, gfp); if (!pool) goto error; pool->areas = (void *)pool + sizeof(*pool); - pool->slots = (void *)pool->areas + sizeof(*pool->areas); tlb_size = nslabs << IO_TLB_SHIFT; - tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp); - if (!tlb) - goto error_tlb; + while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) { + if (nslabs <= minslabs) + goto error_tlb; + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); + tlb_size = nslabs << IO_TLB_SHIFT; + } - swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, 1); + slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); + pool->slots = (struct io_tlb_slot *) + __get_free_pages(gfp, slot_order); + if (!pool->slots) + goto error_slots; + + swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas); return pool; +error_slots: + swiotlb_free_tlb(page_address(tlb), tlb_size); error_tlb: kfree(pool); error: return NULL; } +/** + * swiotlb_dyn_alloc() - dynamic memory pool allocation worker + * @work: Pointer to dyn_alloc in struct io_tlb_mem. + */ +static void swiotlb_dyn_alloc(struct work_struct *work) +{ + struct io_tlb_mem *mem = + container_of(work, struct io_tlb_mem, dyn_alloc); + struct io_tlb_pool *pool; + + pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs, + default_nareas, mem->phys_limit, GFP_KERNEL); + if (!pool) { + pr_warn_ratelimited("Failed to allocate new pool"); + return; + } + + add_mem_pool(mem, pool); + + /* Pairs with smp_rmb() in swiotlb_find_pool(). */ + smp_wmb(); +} + /** * swiotlb_dyn_free() - RCU callback to free a memory pool * @rcu: RCU head in the corresponding struct io_tlb_pool. @@ -670,8 +741,10 @@ error: static void swiotlb_dyn_free(struct rcu_head *rcu) { struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu); + size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); size_t tlb_size = pool->end - pool->start; + free_pages((unsigned long)pool->slots, get_order(slots_size)); swiotlb_free_tlb(pool->vaddr, tlb_size); kfree(pool); } @@ -689,15 +762,19 @@ static void swiotlb_dyn_free(struct rcu_head *rcu) struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - struct io_tlb_pool *pool = &mem->defpool; - - if (paddr >= pool->start && paddr < pool->end) - return pool; + struct io_tlb_pool *pool; - /* Pairs with smp_wmb() in swiotlb_find_slots(). */ + /* Pairs with smp_wmb() in swiotlb_find_slots() and + * swiotlb_dyn_alloc(), which modify the RCU lists. + */ smp_rmb(); rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) { + if (paddr >= pool->start && paddr < pool->end) + goto out; + } + list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) { if (paddr >= pool->start && paddr < pool->end) goto out; @@ -1046,18 +1123,24 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, u64 phys_limit; int index; - pool = &mem->defpool; - index = swiotlb_pool_find_slots(dev, pool, orig_addr, - alloc_size, alloc_align_mask); - if (index >= 0) - goto found; - + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) { + index = swiotlb_pool_find_slots(dev, pool, orig_addr, + alloc_size, alloc_align_mask); + if (index >= 0) { + rcu_read_unlock(); + goto found; + } + } + rcu_read_unlock(); if (!mem->can_grow) return -1; + schedule_work(&mem->dyn_alloc); + nslabs = nr_slots(alloc_size); phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); - pool = swiotlb_alloc_pool(dev, nslabs, phys_limit, + pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit, GFP_NOWAIT | __GFP_NOWARN); if (!pool) return -1; @@ -1141,7 +1224,19 @@ static unsigned long mem_pool_used(struct io_tlb_pool *pool) */ static unsigned long mem_used(struct io_tlb_mem *mem) { +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct io_tlb_pool *pool; + unsigned long used = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) + used += mem_pool_used(pool); + rcu_read_unlock(); + + return used; +#else return mem_pool_used(&mem->defpool); +#endif } #endif /* CONFIG_DEBUG_FS */ @@ -1562,7 +1657,10 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, false, nareas); mem->force_bounce = true; mem->for_alloc = true; - mem->nslabs = nslabs; +#ifdef CONFIG_SWIOTLB_DYNAMIC + spin_lock_init(&mem->lock); +#endif + add_mem_pool(mem, pool); rmem->priv = mem; -- cgit From 1395706a14904f2593debecf20f827e72d7392a7 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:04 +0200 Subject: swiotlb: search the software IO TLB only if the device makes use of it Skip searching the software IO TLB if a device has never used it, making sure these devices are not affected by the introduction of multiple IO TLB memory pools. Additional memory barrier is required to ensure that the new value of the flag is visible to other CPUs after mapping a new bounce buffer. For efficiency, the flag check should be inlined, and then the memory barrier must be moved to is_swiotlb_buffer(). However, it can replace the existing barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer() first to verify that the buffer address belongs to the software IO TLB. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/device.h | 2 ++ include/linux/swiotlb.h | 7 ++++++- kernel/dma/swiotlb.c | 14 ++++++-------- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 5fd89c9d005c..6fc808d22bfd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -628,6 +628,7 @@ struct device_physical_location { * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. * @dma_io_tlb_pools: List of transient swiotlb memory pools. * @dma_io_tlb_lock: Protects changes to the list of active pools. + * @dma_uses_io_tlb: %true if device has used the software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -737,6 +738,7 @@ struct device { #ifdef CONFIG_SWIOTLB_DYNAMIC struct list_head dma_io_tlb_pools; spinlock_t dma_io_tlb_lock; + bool dma_uses_io_tlb; #endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 8371c92a0271..b4536626f8ff 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) if (!mem) return false; - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { + /* Pairs with smp_wmb() in swiotlb_find_slots() and + * swiotlb_dyn_alloc(), which modify the RCU lists. + */ + smp_rmb(); return swiotlb_find_pool(dev, paddr); + } return paddr >= mem->defpool.start && paddr < mem->defpool.end; } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index adf80dec42d7..d7eac84f975b 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct work_struct *work) add_mem_pool(mem, pool); - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ + /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ smp_wmb(); } @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; - /* Pairs with smp_wmb() in swiotlb_find_slots() and - * swiotlb_dyn_alloc(), which modify the RCU lists. - */ - smp_rmb(); - rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) { if (paddr >= pool->start && paddr < pool->end) @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev) #ifdef CONFIG_SWIOTLB_DYNAMIC INIT_LIST_HEAD(&dev->dma_io_tlb_pools); spin_lock_init(&dev->dma_io_tlb_lock); + dev->dma_uses_io_tlb = false; #endif } @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ - smp_wmb(); found: + dev->dma_uses_io_tlb = true; + /* Pairs with smp_rmb() in is_swiotlb_buffer() */ + smp_wmb(); + *retpool = pool; return index; } -- cgit