anolis: phytium: pswiotlb: Add PSWIOTLB mechanism to improve DMA performance
ANBZ: #21762 This patch added additional "memory copy" to improve D2H direction DMA performance on Phytium Server SoCs. Signed-off-by: Cui Chao <cuichao1753@phytium.com.cn> Signed-off-by: Jiakun Shuai <shuaijiakun1288@phytium.com.cn> Reviewed-by: Jay chen <jkchen@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/5432
This commit is contained in:
parent
fb985e3d6f
commit
8cbb260f55
|
@ -31,6 +31,9 @@
|
|||
#include <linux/hugetlb.h>
|
||||
#include <linux/acpi_iort.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
#include <linux/pswiotlb.h>
|
||||
#endif
|
||||
|
||||
#include <asm/boot.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -636,6 +639,13 @@ void __init mem_init(void)
|
|||
|
||||
set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
/* enable pswiotlb default */
|
||||
if ((pswiotlb_force_disable != true) &&
|
||||
is_phytium_ps_socs())
|
||||
pswiotlb_init(1, PSWIOTLB_VERBOSE);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_SPARSEMEM_VMEMMAP
|
||||
free_unused_memmap();
|
||||
#endif
|
||||
|
|
|
@ -28,6 +28,9 @@
|
|||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sysfs.h>
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
#include <linux/pswiotlb.h>
|
||||
#endif
|
||||
|
||||
#include "base.h"
|
||||
#include "power/power.h"
|
||||
|
@ -2459,6 +2462,11 @@ void device_initialize(struct device *dev)
|
|||
INIT_LIST_HEAD(&dev->dev_msi_list);
|
||||
dev->msi_last_list = &dev->msi_list;
|
||||
dev->dev_msi_last_list = &dev->dev_msi_list;
|
||||
#endif
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if ((pswiotlb_force_disable != true) &&
|
||||
is_phytium_ps_socs())
|
||||
pswiotlb_dev_init(dev);
|
||||
#endif
|
||||
INIT_LIST_HEAD(&dev->links.consumers);
|
||||
INIT_LIST_HEAD(&dev->links.suppliers);
|
||||
|
|
|
@ -34,6 +34,9 @@
|
|||
#ifdef CONFIG_MACH_LOONGSON64
|
||||
#include <linux/suspend.h>
|
||||
#endif
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
#include <linux/pswiotlb.h>
|
||||
#endif
|
||||
#include "pci.h"
|
||||
|
||||
DEFINE_MUTEX(pci_slot_mutex);
|
||||
|
@ -4343,6 +4346,15 @@ void __weak pcibios_set_master(struct pci_dev *dev)
|
|||
*/
|
||||
void pci_set_master(struct pci_dev *dev)
|
||||
{
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if ((pswiotlb_force_disable != true) &&
|
||||
is_phytium_ps_socs()) {
|
||||
dev->dev.can_use_pswiotlb = pswiotlb_is_dev_in_passthroughlist(dev);
|
||||
dev_info(&dev->dev, "The device %s use pswiotlb because vendor 0x%04x %s in pswiotlb passthroughlist\n",
|
||||
dev->dev.can_use_pswiotlb ? "would" : "would NOT",
|
||||
dev->vendor, dev->dev.can_use_pswiotlb ? "is NOT" : "is");
|
||||
}
|
||||
#endif
|
||||
__pci_set_master(dev, true);
|
||||
pcibios_set_master(dev);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
#include <linux/hypervisor.h>
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
#include <linux/pswiotlb.h>
|
||||
#endif
|
||||
#include "pci.h"
|
||||
|
||||
#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
|
||||
|
@ -2489,7 +2492,13 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
|
|||
|
||||
dma_set_max_seg_size(&dev->dev, 65536);
|
||||
dma_set_seg_boundary(&dev->dev, 0xffffffff);
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if ((pswiotlb_force_disable != true) &&
|
||||
is_phytium_ps_socs()) {
|
||||
pswiotlb_store_local_node(dev, bus);
|
||||
dma_set_seg_boundary(&dev->dev, 0xffffffffffff);
|
||||
}
|
||||
#endif
|
||||
/* Fix up broken headers */
|
||||
pci_fixup_device(pci_fixup_header, dev);
|
||||
|
||||
|
|
|
@ -423,6 +423,8 @@ struct dev_links_info {
|
|||
* @dma_pools: Dma pools (if dma'ble device).
|
||||
* @dma_mem: Internal for coherent mem override.
|
||||
* @cma_area: Contiguous memory area for dma allocations
|
||||
* @dma_p_io_tlb_mem: Phytium Software IO TLB allocator. Not for driver use.
|
||||
* @dma_uses_p_io_tlb: %true if device has used the Phytium software IO TLB.
|
||||
* @archdata: For arch-specific additions.
|
||||
* @of_node: Associated device tree node.
|
||||
* @fwnode: Associated device node supplied by platform firmware.
|
||||
|
@ -509,6 +511,11 @@ struct device {
|
|||
struct list_head dev_msi_list;
|
||||
struct list_head *dev_msi_last_list;
|
||||
#endif
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
struct p_io_tlb_mem *dma_p_io_tlb_mem;
|
||||
bool dma_uses_p_io_tlb;
|
||||
bool can_use_pswiotlb;
|
||||
#endif
|
||||
#ifdef CONFIG_DMA_OPS
|
||||
const struct dma_map_ops *dma_ops;
|
||||
#endif
|
||||
|
@ -541,6 +548,9 @@ struct device {
|
|||
|
||||
#ifdef CONFIG_NUMA
|
||||
int numa_node; /* NUMA node this device is close to */
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
int local_node; /* NUMA node this device is really belong to */
|
||||
#endif
|
||||
#endif
|
||||
dev_t devt; /* dev_t, creates the sysfs "dev" */
|
||||
u32 id; /* device instance */
|
||||
|
|
|
@ -200,6 +200,12 @@ enum pageflags {
|
|||
/* For self-hosted memmap pages */
|
||||
PG_vmemmap_self_hosted = PG_owner_priv_1,
|
||||
#endif
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
/* check if pswiotlb is sync already */
|
||||
PG_pswiotlbsync = __NR_PAGEFLAGS + 1,
|
||||
/* check if the page is used for pswiotlb */
|
||||
PG_pswiotlb,
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifndef __GENERATING_BOUNDS_H
|
||||
|
|
|
@ -0,0 +1,366 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __LINUX_PSWIOTLB_H
|
||||
#define __LINUX_PSWIOTLB_H
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/dma-direction.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/arm-smccc.h>
|
||||
|
||||
struct device;
|
||||
struct page;
|
||||
struct scatterlist;
|
||||
extern bool pswiotlb_force_disable;
|
||||
struct p_io_tlb_pool;
|
||||
|
||||
#define SOC_ID_PS23064 0x8
|
||||
#define SOC_ID_PS24080 0x6
|
||||
#define MIDR_PS 0x700F8620
|
||||
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
|
||||
#define PSWIOTLB_VERBOSE (1 << 0) /* verbose initialization */
|
||||
#define PSWIOTLB_FORCEOFF (1 << 1) /* force phytium bounce buffering off*/
|
||||
#define PSWIOTLB_ANY (1 << 2) /* allow any memory for the buffer */
|
||||
#define PSWIOTLB_FREE_THRESHOLD 30
|
||||
static bool is_ps_socs;
|
||||
|
||||
/*
|
||||
* Maximum allowable number of contiguous slabs to map,
|
||||
* must be a power of 2. What is the appropriate value ?
|
||||
* The complexity of {map,unmap}_single is linearly dependent on this value.
|
||||
*/
|
||||
#define P_IO_TLB_SEGSIZE 1024
|
||||
|
||||
/*
|
||||
* log of the size of each Phytium IO TLB slab. The number of slabs is command line
|
||||
* controllable.
|
||||
*/
|
||||
#define P_IO_TLB_SHIFT 11
|
||||
#define P_IO_TLB_SIZE (1 << P_IO_TLB_SHIFT)
|
||||
|
||||
/* default to 256MB */
|
||||
#define P_IO_TLB_DEFAULT_SIZE (256UL<<20)
|
||||
#define P_IO_TLB_INC_THR (64UL<<20)
|
||||
#define P_IO_TLB_EXT_WATERMARK (80)
|
||||
|
||||
/* passthroughlist which incompatible with pswiotlb temporarily */
|
||||
#define BL_PCI_VENDOR_ID_NVIDIA 0x10de
|
||||
#define BL_PCI_VENDOR_ID_ILUVATAR 0x1E3E
|
||||
#define BL_PCI_VENDOR_ID_METAX 0x9999
|
||||
|
||||
unsigned long pswiotlb_size_or_default(void);
|
||||
void __init pswiotlb_init_remap(bool addressing_limit, int nid, unsigned int flags,
|
||||
int (*remap)(void *tlb, unsigned long nslabs));
|
||||
|
||||
phys_addr_t pswiotlb_tbl_map_single(struct device *hwdev, int nid, phys_addr_t phys,
|
||||
size_t mapping_size, size_t alloc_size, unsigned int alloc_align_mask,
|
||||
enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
|
||||
extern void pswiotlb_tbl_unmap_single(struct device *hwdev,
|
||||
int nid,
|
||||
phys_addr_t tlb_addr,
|
||||
size_t offset,
|
||||
size_t mapping_size,
|
||||
enum dma_data_direction dir,
|
||||
unsigned long attrs,
|
||||
struct p_io_tlb_pool *pool);
|
||||
|
||||
void pswiotlb_sync_single_for_device(struct device *dev, int nid, phys_addr_t tlb_addr,
|
||||
size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool);
|
||||
void pswiotlb_sync_single_for_cpu(struct device *dev, int nid, phys_addr_t tlb_addr,
|
||||
size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool);
|
||||
dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t phys,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs);
|
||||
void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus);
|
||||
void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, unsigned long iova,
|
||||
size_t mapped, int nents, enum dma_data_direction dir, unsigned long attrs);
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
struct pswiotlb_passthroughlist {
|
||||
struct list_head node;
|
||||
unsigned short vendor;
|
||||
unsigned short device;
|
||||
bool from_grub;
|
||||
};
|
||||
|
||||
struct pswiotlb_bypass_rules {
|
||||
unsigned short vendor_id;
|
||||
bool dma_is_sg;
|
||||
enum dma_data_direction dir;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct p_io_tlb_pool - Phytium IO TLB memory pool descriptor
|
||||
* @start: The start address of the pswiotlb memory pool. Used to do a quick
|
||||
* range check to see if the memory was in fact allocated by this
|
||||
* API.
|
||||
* @end: The end address of the pswiotlb memory pool. Used to do a quick
|
||||
* range check to see if the memory was in fact allocated by this
|
||||
* API.
|
||||
* @nslabs: The number of Phytium IO TLB blocks (in groups of 64) between @start and
|
||||
* @end. For default pswiotlb, this is command line adjustable via
|
||||
* setup_io_tlb_npages.
|
||||
* @used: The number of used Phytium IO TLB block.
|
||||
* @list: The free list describing the number of free entries available
|
||||
* from each index.
|
||||
* @index: The index to start searching in the next round.
|
||||
* @orig_addr: The original address corresponding to a mapped entry.
|
||||
* @alloc_size: Size of the allocated buffer.
|
||||
* @lock: The lock to protect the above data structures in the map and
|
||||
* unmap calls.
|
||||
* @vaddr: The vaddr of the pswiotlb memory pool. The pswiotlb memory pool
|
||||
* may be remapped in the memory encrypted case and store virtual
|
||||
* address for bounce buffer operation.
|
||||
* @nslabs: The number of Phytium IO TLB slots between @start and @end. For the
|
||||
* default pswiotlb, this can be adjusted with a boot parameter,
|
||||
* see setup_io_tlb_npages().
|
||||
* @late_alloc: %true if allocated using the page allocator.
|
||||
* @nareas: Number of areas in the pool.
|
||||
* @area_nslabs: Number of slots in each area.
|
||||
* @areas: Array of memory area descriptors.
|
||||
* @slots: Array of slot descriptors.
|
||||
* @node: Member of the Phytium IO TLB memory pool list.
|
||||
* @rcu: RCU head for pswiotlb_dyn_free().
|
||||
* @transient: %true if transient memory pool.
|
||||
* @busy_flag: %true if the pool is used by devices.
|
||||
* @free_cnt: Counters every time the pool is free when checked by monitor.
|
||||
* @free_th: Free threshold determine when to free the pool to memory.
|
||||
* @busy_recode: Bitmap to record the busy status of the areas in the pool.
|
||||
* @node_min_addr: Minimum physical address of the numa node.
|
||||
* @numa_max_addr: Maximum physical address of the numa node.
|
||||
* @numa_node_id: Numa node id the pool belong to.
|
||||
*/
|
||||
struct p_io_tlb_pool {
|
||||
phys_addr_t start;
|
||||
phys_addr_t end;
|
||||
void *vaddr;
|
||||
unsigned long nslabs;
|
||||
bool late_alloc;
|
||||
unsigned int nareas;
|
||||
unsigned int area_nslabs;
|
||||
struct p_io_tlb_area *areas;
|
||||
struct p_io_tlb_slot *slots;
|
||||
struct list_head node;
|
||||
struct rcu_head rcu;
|
||||
bool transient;
|
||||
bool busy_flag;
|
||||
unsigned int free_cnt;
|
||||
unsigned int free_th;
|
||||
unsigned long *busy_record;
|
||||
phys_addr_t node_min_addr;
|
||||
phys_addr_t node_max_addr;
|
||||
int numa_node_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct p_io_tlb_mem - Phytium Software IO TLB allocator
|
||||
* @defpool: Default (initial) Phytium IO TLB memory pool descriptor.
|
||||
* @pool: Phytium IO TLB memory pool descriptor (if not dynamic).
|
||||
* @nslabs: Total number of Phytium IO TLB slabs in all pools.
|
||||
* @debugfs: The dentry to debugfs.
|
||||
* @force_bounce: %true if pswiotlb bouncing is forced
|
||||
* @for_alloc: %true if the pool is used for memory allocation
|
||||
* @can_grow: %true if more pools can be allocated dynamically.
|
||||
* @phys_limit: Maximum allowed physical address.
|
||||
* @pool_addr: Array where all the pools stored.
|
||||
* @capacity: Number of pools which could be allocated.
|
||||
* @whole_size: Number of pools which stored in the pool array.
|
||||
* @lock: Lock to synchronize changes to the list.
|
||||
* @pools: List of Phytium IO TLB memory pool descriptors (if dynamic).
|
||||
* @dyn_alloc: Dynamic Phytium IO TLB pool allocation work.
|
||||
* @total_used: The total number of slots in the pool that are currently used
|
||||
* across all areas. Used only for calculating used_hiwater in
|
||||
* debugfs.
|
||||
* @used_hiwater: The high water mark for total_used. Used only for reporting
|
||||
* in debugfs.
|
||||
* @node_min_addr: Minimum physical address of the numa node.
|
||||
* @numa_max_addr: Maximum physical address of the numa node.
|
||||
* @numa_node_id: Numa node id the mem belong to.
|
||||
*/
|
||||
struct p_io_tlb_mem {
|
||||
struct p_io_tlb_pool defpool;
|
||||
unsigned long nslabs;
|
||||
struct dentry *debugfs;
|
||||
bool force_bounce;
|
||||
bool for_alloc;
|
||||
bool can_grow;
|
||||
u64 phys_limit;
|
||||
struct p_io_tlb_pool *pool_addr[64*1024/8];
|
||||
int capacity;
|
||||
int whole_size;
|
||||
spinlock_t lock;
|
||||
struct list_head pools;
|
||||
struct work_struct dyn_alloc;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
atomic_long_t total_used;
|
||||
atomic_long_t used_hiwater;
|
||||
#endif
|
||||
phys_addr_t node_min_addr;
|
||||
phys_addr_t node_max_addr;
|
||||
unsigned long node_total_mem;
|
||||
int numa_node_id;
|
||||
};
|
||||
|
||||
extern struct p_io_tlb_mem p_io_tlb_default_mem[MAX_NUMNODES];
|
||||
|
||||
struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_t paddr);
|
||||
|
||||
static inline bool is_phytium_ps_socs(void)
|
||||
{
|
||||
unsigned int soc_id;
|
||||
unsigned int midr;
|
||||
|
||||
if (likely(is_ps_socs))
|
||||
return true;
|
||||
|
||||
soc_id = read_sysreg_s(SYS_AIDR_EL1);
|
||||
midr = read_cpuid_id();
|
||||
if ((soc_id == SOC_ID_PS23064 || soc_id == SOC_ID_PS24080)
|
||||
&& midr == MIDR_PS) {
|
||||
is_ps_socs = true;
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr,
|
||||
struct p_io_tlb_pool **pool)
|
||||
{
|
||||
struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid];
|
||||
struct page *page;
|
||||
|
||||
if (!paddr || (paddr == DMA_MAPPING_ERROR))
|
||||
return false;
|
||||
|
||||
page = pfn_to_page(PFN_DOWN(paddr));
|
||||
|
||||
if (test_bit(PG_pswiotlb, &page->flags) == false)
|
||||
return false;
|
||||
|
||||
if (!mem)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* All PSWIOTLB buffer addresses must have been returned by
|
||||
* pswiotlb_tbl_map_single() and passed to a device driver.
|
||||
* If a PSWIOTLB address is checked on another CPU, then it was
|
||||
* presumably loaded by the device driver from an unspecified private
|
||||
* data structure. Make sure that this load is ordered before reading
|
||||
* dev->dma_uses_p_io_tlb here and mem->pools in pswiotlb_find_pool().
|
||||
*
|
||||
* This barrier pairs with smp_mb() in pswiotlb_find_slots().
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
*pool = pswiotlb_find_pool(dev, nid, paddr);
|
||||
if (READ_ONCE(dev->dma_uses_p_io_tlb) && *pool)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size)
|
||||
{
|
||||
dma_addr_t end = addr + size - 1;
|
||||
struct p_io_tlb_mem *mem = &p_io_tlb_default_mem[nid];
|
||||
|
||||
if (addr >= mem->node_min_addr && end <= mem->node_max_addr)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void pswiotlb_init(bool addressing_limited, unsigned int flags);
|
||||
void pswiotlb_dev_init(struct device *dev);
|
||||
size_t pswiotlb_max_mapping_size(struct device *dev);
|
||||
bool is_pswiotlb_allocated(struct device *dev);
|
||||
bool is_pswiotlb_active(struct device *dev);
|
||||
void __init pswiotlb_adjust_size(unsigned long size);
|
||||
phys_addr_t default_pswiotlb_base(struct device *dev);
|
||||
phys_addr_t default_pswiotlb_limit(struct device *dev);
|
||||
bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev);
|
||||
|
||||
extern const struct pswiotlb_bypass_rules bypass_rules_list[];
|
||||
static inline bool pswiotlb_bypass_is_needed(struct device *dev, int nelems,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
bool dma_is_sg = nelems ? true : false;
|
||||
const struct pswiotlb_bypass_rules *list = bypass_rules_list;
|
||||
|
||||
while (list->vendor_id) {
|
||||
if ((pdev->vendor == list->vendor_id) &&
|
||||
(dma_is_sg == list->dma_is_sg) &&
|
||||
(dir == list->dir))
|
||||
return true;
|
||||
list++;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void pswiotlb_init(bool addressing_limited, unsigned int flags)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void pswiotlb_dev_init(struct device *dev)
|
||||
{
|
||||
}
|
||||
static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr,
|
||||
struct p_io_tlb_pool **pool)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline size_t pswiotlb_max_mapping_size(struct device *dev)
|
||||
{
|
||||
return SIZE_MAX;
|
||||
}
|
||||
|
||||
static inline bool is_pswiotlb_allocated(struct device *dev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool is_pswiotlb_active(struct device *dev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void pswiotlb_adjust_size(unsigned long size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline phys_addr_t default_pswiotlb_base(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline phys_addr_t default_pswiotlb_limit(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool pswiotlb_bypass_is_needed(struct device *dev, int nelems,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_PSWIOTLB */
|
||||
|
||||
extern void pswiotlb_print_info(int);
|
||||
extern bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size);
|
||||
|
||||
#endif /* __LINUX_PSWIOTLB_H */
|
|
@ -0,0 +1,44 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM pswiotlb
|
||||
|
||||
#if !defined(_TRACE_PSWIOTLB_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_PSWIOTLB_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
TRACE_EVENT(pswiotlb_bounced,
|
||||
|
||||
TP_PROTO(struct device *dev,
|
||||
dma_addr_t dev_addr,
|
||||
size_t size),
|
||||
|
||||
TP_ARGS(dev, dev_addr, size),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(dev_name, dev_name(dev))
|
||||
__field(u64, dma_mask)
|
||||
__field(dma_addr_t, dev_addr)
|
||||
__field(size_t, size)
|
||||
__field(bool, force)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(dev_name, dev_name(dev));
|
||||
__entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0);
|
||||
__entry->dev_addr = dev_addr;
|
||||
__entry->size = size;
|
||||
),
|
||||
|
||||
TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s",
|
||||
__get_str(dev_name),
|
||||
__entry->dma_mask,
|
||||
(unsigned long long)__entry->dev_addr,
|
||||
__entry->size,
|
||||
__entry->force ? "NORMAL" : "FORCEOFF")
|
||||
);
|
||||
|
||||
#endif /* _TRACE_PSWIOTLB_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
|
@ -235,3 +235,5 @@ config DMA_API_DEBUG_SG
|
|||
is technically out-of-spec.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
source "kernel/dma/phytium/Kconfig"
|
||||
|
|
|
@ -8,5 +8,6 @@ obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o
|
|||
obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
|
||||
obj-$(CONFIG_DMA_API_DEBUG) += debug.o
|
||||
obj-$(CONFIG_SWIOTLB) += swiotlb.o
|
||||
obj-$(CONFIG_PSWIOTLB) += phytium/
|
||||
obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o
|
||||
obj-$(CONFIG_DMA_REMAP) += remap.o
|
||||
|
|
|
@ -51,6 +51,10 @@
|
|||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/cma.h>
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
#include "./phytium/pswiotlb-dma.h"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CMA_SIZE_MBYTES
|
||||
#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
|
||||
#else
|
||||
|
@ -309,6 +313,10 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
|
|||
#ifdef CONFIG_DMA_PERNUMA_CMA
|
||||
int nid = dev_to_node(dev);
|
||||
#endif
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
/* CMA can be used only in the context which permits sleeping */
|
||||
if (!gfpflags_allow_blocking(gfp))
|
||||
|
@ -351,6 +359,13 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size)
|
|||
{
|
||||
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev)) {
|
||||
__free_pages(page, get_order(size));
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
/* if dev has its own cma, free page from there */
|
||||
if (dev->cma_area) {
|
||||
if (cma_release(dev->cma_area, page, count))
|
||||
|
|
|
@ -15,6 +15,9 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include "debug.h"
|
||||
#include "direct.h"
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
#include "./phytium/pswiotlb-dma.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Managed DMA API
|
||||
|
@ -149,6 +152,13 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
|
|||
if (WARN_ON_ONCE(!dev->dma_mask))
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev) &&
|
||||
!pswiotlb_bypass_is_needed(dev, 0, dir)) {
|
||||
addr = pswiotlb_dma_map_page_distribute(dev, page, offset, size, dir, attrs);
|
||||
return addr;
|
||||
}
|
||||
#endif
|
||||
if (dma_map_direct(dev, ops))
|
||||
addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
|
||||
else
|
||||
|
@ -166,6 +176,12 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
|
|||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev)) {
|
||||
pswiotlb_dma_unmap_page_attrs_distribute(dev, addr, size, dir, attrs);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (is_zhaoxin_kh40000())
|
||||
patch_p2cw_single_map(dev, addr, dir, ops);
|
||||
|
||||
|
@ -192,6 +208,13 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
|
|||
if (WARN_ON_ONCE(!dev->dma_mask))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev) &&
|
||||
!pswiotlb_bypass_is_needed(dev, nents, dir)) {
|
||||
ents = pswiotlb_dma_map_sg_attrs_distribute(dev, sg, nents, dir, attrs);
|
||||
return ents;
|
||||
}
|
||||
#endif
|
||||
if (dma_map_direct(dev, ops))
|
||||
ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
|
||||
else
|
||||
|
@ -212,6 +235,12 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
|
|||
BUG_ON(!valid_dma_direction(dir));
|
||||
debug_dma_unmap_sg(dev, sg, nents, dir);
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev)) {
|
||||
pswiotlb_dma_unmap_sg_attrs_distribute(dev, sg, nents, dir, attrs);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (is_zhaoxin_kh40000())
|
||||
patch_p2cw_sg_map(dev, sg, nents, dir, ops);
|
||||
|
||||
|
@ -266,6 +295,12 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
|||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev)) {
|
||||
pswiotlb_dma_sync_single_for_cpu_distribute(dev, addr, size, dir);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (is_zhaoxin_kh40000())
|
||||
patch_p2cw_single_map(dev, addr, dir, ops);
|
||||
|
||||
|
@ -283,6 +318,12 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
|
|||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev)) {
|
||||
pswiotlb_dma_sync_single_for_device_distribute(dev, addr, size, dir);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (dma_map_direct(dev, ops))
|
||||
dma_direct_sync_single_for_device(dev, addr, size, dir);
|
||||
else if (ops->sync_single_for_device)
|
||||
|
@ -298,9 +339,14 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
|
|||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev)) {
|
||||
pswiotlb_dma_sync_sg_for_cpu_distribute(dev, sg, nelems, dir);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (is_zhaoxin_kh40000())
|
||||
patch_p2cw_sg_map(dev, sg, nelems, dir, ops);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir);
|
||||
else if (ops->sync_sg_for_cpu)
|
||||
|
@ -315,6 +361,12 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
|
|||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
if (check_if_pswiotlb_is_applicable(dev)) {
|
||||
pswiotlb_dma_sync_sg_for_device_distribute(dev, sg, nelems, dir);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (dma_map_direct(dev, ops))
|
||||
dma_direct_sync_sg_for_device(dev, sg, nelems, dir);
|
||||
else if (ops->sync_sg_for_device)
|
||||
|
@ -442,6 +494,9 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
|
|||
|
||||
WARN_ON_ONCE(!dev->coherent_dma_mask);
|
||||
|
||||
#ifdef CONFIG_PSWIOTLB
|
||||
check_if_pswiotlb_is_applicable(dev);
|
||||
#endif
|
||||
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
|
||||
return cpu_addr;
|
||||
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config PSWIOTLB
|
||||
bool "Phytium software IO TLB"
|
||||
select NEED_DMA_MAP_STATE
|
||||
depends on ARCH_PHYTIUM && NUMA
|
|
@ -0,0 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
obj-$(CONFIG_PSWIOTLB) += pswiotlb.o
|
||||
obj-$(CONFIG_PSWIOTLB) += pswiotlb-mapping.o
|
||||
obj-$(CONFIG_PSWIOTLB) += pswiotlb-direct.o
|
||||
obj-$(CONFIG_PSWIOTLB) += pswiotlb-iommu.o
|
|
@ -0,0 +1,148 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* DMA operations based on Phytium software IO tlb that
|
||||
* map physical memory directly without using an IOMMU.
|
||||
*
|
||||
* Copyright (c) 2024, Phytium Technology Co., Ltd.
|
||||
*/
|
||||
#include <linux/memblock.h> /* for max_pfn */
|
||||
#include <linux/export.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/set_memory.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pswiotlb.h>
|
||||
#include "pswiotlb-dma.h"
|
||||
|
||||
/*
|
||||
* The following functions are ported from
|
||||
* ./drivers/dma/direct.c
|
||||
* static inline dma_addr_t phys_to_dma_direct(struct device *dev,
|
||||
* phys_addr_t phys);
|
||||
*/
|
||||
|
||||
static inline dma_addr_t phys_to_dma_direct(struct device *dev,
|
||||
phys_addr_t phys)
|
||||
{
|
||||
if (force_dma_unencrypted(dev))
|
||||
return phys_to_dma_unencrypted(dev, phys);
|
||||
return phys_to_dma(dev, phys);
|
||||
}
|
||||
|
||||
bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
|
||||
{
|
||||
dma_addr_t dma_addr = phys_to_dma_direct(dev, phys);
|
||||
|
||||
if (dma_addr == DMA_MAPPING_ERROR)
|
||||
return false;
|
||||
return dma_addr + size - 1 <=
|
||||
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
|
||||
defined(CONFIG_PSWIOTLB)
|
||||
void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
|
||||
|
||||
if (unlikely(is_swiotlb_buffer(paddr)))
|
||||
swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, SYNC_FOR_DEVICE);
|
||||
//swiotlb_sync_single_for_device(dev, paddr, sg->length,
|
||||
// dir);
|
||||
|
||||
if (is_pswiotlb_active(dev) &&
|
||||
unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool)))
|
||||
pswiotlb_sync_single_for_device(dev, nid, paddr,
|
||||
sg->length, dir, pool);
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_device(paddr, sg->length,
|
||||
dir);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
|
||||
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
|
||||
defined(CONFIG_PSWIOTLB)
|
||||
void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(paddr, sg->length, dir);
|
||||
|
||||
if (unlikely(is_swiotlb_buffer(paddr)))
|
||||
swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, SYNC_FOR_CPU);
|
||||
//swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
|
||||
// dir);
|
||||
|
||||
if (is_pswiotlb_active(dev) &&
|
||||
unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool)))
|
||||
pswiotlb_sync_single_for_cpu(dev, nid, paddr,
|
||||
sg->length, dir, pool);
|
||||
|
||||
if (dir == DMA_FROM_DEVICE)
|
||||
arch_dma_mark_clean(paddr, sg->length);
|
||||
}
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu_all();
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmaps segments, except for ones marked as pci_p2pdma which do not
|
||||
* require any further action as they contain a bus address.
|
||||
*/
|
||||
void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
for_each_sg(sgl, sg, nents, i)
|
||||
pswiotlb_dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
|
||||
attrs);
|
||||
}
|
||||
#endif
|
||||
|
||||
int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i, ret;
|
||||
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
sg->dma_address = pswiotlb_dma_direct_map_page(dev, sg_page(sg),
|
||||
sg->offset, sg->length, dir, attrs);
|
||||
if (sg->dma_address == DMA_MAPPING_ERROR) {
|
||||
ret = -EIO;
|
||||
goto out_unmap;
|
||||
}
|
||||
sg_dma_len(sg) = sg->length;
|
||||
}
|
||||
|
||||
return nents;
|
||||
|
||||
out_unmap:
|
||||
pswiotlb_dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* DMA operations based on Phytium software IO tlb that
|
||||
* map physical memory.
|
||||
*
|
||||
* Copyright (c) 2024, Phytium Technology Co., Ltd.
|
||||
*/
|
||||
#ifndef _KERNEL_PSWIOTLB_DMA_DIRECT_H
|
||||
#define _KERNEL_PSWIOTLB_DMA_DIRECT_H
|
||||
|
||||
#include <linux/dma-direct.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/pswiotlb.h>
|
||||
|
||||
extern bool pswiotlb_force_disable;
|
||||
int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
|
||||
enum dma_data_direction dir, unsigned long attrs);
|
||||
dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page,
|
||||
size_t offset, size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs);
|
||||
int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs);
|
||||
void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir);
|
||||
void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir);
|
||||
void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir);
|
||||
void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir);
|
||||
|
||||
dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page,
|
||||
unsigned long offset, size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs);
|
||||
int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs);
|
||||
void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs);
|
||||
void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev,
|
||||
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir);
|
||||
void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev,
|
||||
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir);
|
||||
void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sgl, int nelems,
|
||||
enum dma_data_direction dir);
|
||||
void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sgl, int nelems,
|
||||
enum dma_data_direction dir);
|
||||
|
||||
static inline bool check_if_pswiotlb_is_applicable(struct device *dev)
|
||||
{
|
||||
if (dev && dev->can_use_pswiotlb && is_phytium_ps_socs()
|
||||
&& !pswiotlb_force_disable) {
|
||||
if (dev->numa_node == NUMA_NO_NODE ||
|
||||
dev->numa_node != dev->local_node)
|
||||
dev->numa_node = dev->local_node;
|
||||
|
||||
if (dev_is_pci(dev) && (dev->numa_node != NUMA_NO_NODE))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
|
||||
defined(CONFIG_PSWIOTLB)
|
||||
void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir);
|
||||
#else
|
||||
static inline void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
|
||||
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
|
||||
defined(CONFIG_PSWIOTLB)
|
||||
void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs);
|
||||
void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir);
|
||||
#else
|
||||
static inline void pswiotlb_dma_direct_unmap_sg(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
}
|
||||
static inline void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void pswiotlb_dma_direct_sync_single_for_device(struct device *dev,
|
||||
dma_addr_t addr, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
phys_addr_t paddr = dma_to_phys(dev, addr);
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
if (unlikely(is_swiotlb_buffer(paddr)))
|
||||
//swiotlb_sync_single_for_device(dev, paddr, size, dir);
|
||||
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool)))
|
||||
pswiotlb_sync_single_for_device(dev, nid, paddr, size, dir, pool);
|
||||
}
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_device(paddr, size, dir);
|
||||
}
|
||||
|
||||
static inline void pswiotlb_dma_direct_sync_single_for_cpu(struct device *dev,
|
||||
dma_addr_t addr, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
phys_addr_t paddr = dma_to_phys(dev, addr);
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_for_cpu(paddr, size, dir);
|
||||
arch_sync_dma_for_cpu_all();
|
||||
}
|
||||
|
||||
if (unlikely(is_swiotlb_buffer(paddr)))
|
||||
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
|
||||
//swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool)))
|
||||
pswiotlb_sync_single_for_cpu(dev, nid, paddr, size, dir, pool);
|
||||
}
|
||||
|
||||
if (dir == DMA_FROM_DEVICE)
|
||||
arch_dma_mark_clean(paddr, size);
|
||||
}
|
||||
|
||||
static inline dma_addr_t pswiotlb_dma_direct_map_page(struct device *dev,
|
||||
struct page *page, unsigned long offset, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
phys_addr_t phys = page_to_phys(page) + offset;
|
||||
dma_addr_t dma_addr = phys_to_dma(dev, phys);
|
||||
int nid = dev->numa_node;
|
||||
|
||||
if (unlikely(swiotlb_force == SWIOTLB_FORCE))
|
||||
return swiotlb_map(dev, phys, size, dir, attrs);
|
||||
|
||||
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
|
||||
if (swiotlb_force != SWIOTLB_NO_FORCE)
|
||||
return swiotlb_map(dev, phys, size, dir, attrs);
|
||||
|
||||
dev_WARN_ONCE(dev, 1,
|
||||
"DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
|
||||
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
/* check whether dma addr is in local node */
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
if (dir != DMA_TO_DEVICE) {
|
||||
if (unlikely(!dma_is_in_local_node(dev, nid, dma_addr, size))) {
|
||||
dma_addr = pswiotlb_map(dev, nid, phys, size, dir, attrs);
|
||||
if (dma_addr == DMA_MAPPING_ERROR) {
|
||||
dma_addr = phys_to_dma(dev, phys);
|
||||
dev_warn_once(dev,
|
||||
"Failed to allocate memory from pswiotlb, fall back to non-local dma\n");
|
||||
} else
|
||||
return dma_addr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
arch_sync_dma_for_device(phys, size, dir);
|
||||
return dma_addr;
|
||||
}
|
||||
|
||||
static inline void pswiotlb_dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
phys_addr_t phys = dma_to_phys(dev, addr);
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
|
||||
!dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_for_cpu(phys, size, dir);
|
||||
arch_sync_dma_for_cpu_all();
|
||||
}
|
||||
|
||||
if (unlikely(is_swiotlb_buffer(phys)))
|
||||
swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
if (unlikely(is_pswiotlb_buffer(dev, nid, phys, &pool)))
|
||||
pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool);
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE))
|
||||
arch_dma_mark_clean(phys, size);
|
||||
}
|
||||
}
|
||||
#endif /* _KERNEL_PSWIOTLB_DMA_DIRECT_H */
|
|
@ -0,0 +1,929 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* DMA operations based on Phytium software IO tlb that
|
||||
* map physical memory indirectly with an IOMMU.
|
||||
*
|
||||
* Copyright (c) 2024, Phytium Technology Co., Ltd.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "pswiotlb iommu: " fmt
|
||||
|
||||
#include <linux/acpi_iort.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/dma-iommu.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bits.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/huge_mm.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/iova.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/property.h>
|
||||
#include <linux/fsl/mc.h>
|
||||
#include <linux/module.h>
|
||||
#include <trace/events/iommu.h>
|
||||
#include <linux/swiotlb.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/dma-direct.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/of_iommu.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/pswiotlb.h>
|
||||
#ifdef CONFIG_ARCH_PHYTIUM
|
||||
#include <asm/cputype.h>
|
||||
#endif
|
||||
|
||||
#include "pswiotlb-dma.h"
|
||||
|
||||
enum iommu_dma_cookie_type {
|
||||
IOMMU_DMA_IOVA_COOKIE,
|
||||
IOMMU_DMA_MSI_COOKIE,
|
||||
};
|
||||
|
||||
struct iommu_dma_cookie {
|
||||
enum iommu_dma_cookie_type type;
|
||||
union {
|
||||
/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
|
||||
struct iova_domain iovad;
|
||||
/* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
|
||||
dma_addr_t msi_iova;
|
||||
};
|
||||
struct list_head msi_page_list;
|
||||
|
||||
/* Domain for flush queue callback; NULL if flush queue not in use */
|
||||
struct iommu_domain *fq_domain;
|
||||
};
|
||||
|
||||
//static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
|
||||
|
||||
/*
|
||||
* The following functions are ported from
|
||||
* ./drivers/iommu/dma-iommu.c
|
||||
* ./drivers/iommu/iommu.c
|
||||
* static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
|
||||
* phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
|
||||
* static bool dev_is_untrusted(struct device *dev);
|
||||
* static int iommu_dma_deferred_attach(struct device *dev,
|
||||
* struct iommu_domain *domain)
|
||||
* static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
|
||||
* unsigned long attrs);
|
||||
* static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
|
||||
* size_t size, u64 dma_limit, struct device *dev);
|
||||
* static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
|
||||
* dma_addr_t iova, size_t size);
|
||||
* static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
|
||||
* size_t size);
|
||||
* static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
|
||||
* size_t size, int prot, u64 dma_mask);
|
||||
* static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
|
||||
* dma_addr_t dma_addr);
|
||||
* static void __invalidate_sg(struct scatterlist *sg, int nents);
|
||||
*/
|
||||
|
||||
static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
|
||||
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
|
||||
{
|
||||
const struct iommu_ops *ops = domain->ops;
|
||||
unsigned long orig_iova = iova;
|
||||
unsigned int min_pagesz;
|
||||
size_t orig_size = size;
|
||||
phys_addr_t orig_paddr = paddr;
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(ops->map == NULL ||
|
||||
domain->pgsize_bitmap == 0UL))
|
||||
return -ENODEV;
|
||||
|
||||
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
|
||||
return -EINVAL;
|
||||
|
||||
/* find out the minimum page size supported */
|
||||
min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
|
||||
|
||||
/*
|
||||
* both the virtual address and the physical one, as well as
|
||||
* the size of the mapping, must be aligned (at least) to the
|
||||
* size of the smallest page supported by the hardware
|
||||
*/
|
||||
if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
|
||||
pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
|
||||
iova, &paddr, size, min_pagesz);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
|
||||
|
||||
while (size) {
|
||||
size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
|
||||
|
||||
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
|
||||
iova, &paddr, pgsize);
|
||||
ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
iova += pgsize;
|
||||
paddr += pgsize;
|
||||
size -= pgsize;
|
||||
}
|
||||
|
||||
/* unroll mapping in case something went wrong */
|
||||
if (ret)
|
||||
iommu_unmap(domain, orig_iova, orig_size - size);
|
||||
else
|
||||
trace_map(orig_iova, orig_paddr, orig_size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t __iommu_map_sg_dma(struct device *dev, struct iommu_domain *domain,
|
||||
unsigned long iova, struct scatterlist *sg, unsigned int nents,
|
||||
int prot, gfp_t gfp, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
const struct iommu_ops *ops = domain->ops;
|
||||
size_t mapped = 0;
|
||||
int ret;
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
size_t aligned_size;
|
||||
int nid = dev->numa_node;
|
||||
struct scatterlist *sg_orig = sg;
|
||||
struct scatterlist *s;
|
||||
int i;
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
|
||||
|
||||
/* check whether dma addr is in local node */
|
||||
if (dir != DMA_TO_DEVICE) {
|
||||
aligned_size = s->length;
|
||||
if ((!dma_is_in_local_node(dev, nid, phys,
|
||||
aligned_size)) && (pswiotlb_force_disable != true)) {
|
||||
aligned_size = iova_align(iovad, s->length);
|
||||
phys = pswiotlb_tbl_map_single(dev, nid,
|
||||
phys, s->length, aligned_size, iova_mask(iovad), dir, attrs);
|
||||
if (phys == DMA_MAPPING_ERROR) {
|
||||
phys = page_to_phys(sg_page(s)) + s->offset;
|
||||
dev_warn_once(dev,
|
||||
"Failed to allocate memory from pswiotlb, fall back to non-local dma\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
arch_sync_dma_for_device(phys, s->length, dir);
|
||||
|
||||
ret = __iommu_map(domain, iova + mapped, phys,
|
||||
s->length, prot, gfp);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
|
||||
mapped += s->length;
|
||||
}
|
||||
|
||||
if (ops->iotlb_sync_map)
|
||||
ops->iotlb_sync_map(domain, iova, mapped);
|
||||
return mapped;
|
||||
|
||||
out_err:
|
||||
/* undo mappings already done */
|
||||
iommu_dma_unmap_sg_pswiotlb(dev, sg_orig, iova,
|
||||
mapped, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
|
||||
iommu_unmap(domain, iova, mapped);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t pswiotlb_iommu_map_sg_atomic_dma(struct device *dev,
|
||||
struct iommu_domain *domain, unsigned long iova,
|
||||
struct scatterlist *sg, unsigned int nents, int prot,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
return __iommu_map_sg_dma(dev, domain, iova, sg, nents, prot, GFP_ATOMIC, dir, attrs);
|
||||
}
|
||||
static bool dev_is_untrusted(struct device *dev)
|
||||
{
|
||||
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
|
||||
}
|
||||
|
||||
static int iommu_dma_deferred_attach(struct device *dev,
|
||||
struct iommu_domain *domain)
|
||||
{
|
||||
const struct iommu_ops *ops = domain->ops;
|
||||
|
||||
if (!is_kdump_kernel())
|
||||
return 0;
|
||||
|
||||
if (unlikely(ops->is_attach_deferred &&
|
||||
ops->is_attach_deferred(domain, dev)))
|
||||
return iommu_attach_device(domain, dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
|
||||
* page flags.
|
||||
* @dir: Direction of DMA transfer
|
||||
* @coherent: Is the DMA master cache-coherent?
|
||||
* @attrs: DMA attributes for the mapping
|
||||
*
|
||||
* Return: corresponding IOMMU API page protection flags
|
||||
*/
|
||||
static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
|
||||
unsigned long attrs)
|
||||
{
|
||||
int prot = coherent ? IOMMU_CACHE : 0;
|
||||
|
||||
if (attrs & DMA_ATTR_PRIVILEGED)
|
||||
prot |= IOMMU_PRIV;
|
||||
|
||||
switch (dir) {
|
||||
case DMA_BIDIRECTIONAL:
|
||||
return prot | IOMMU_READ | IOMMU_WRITE;
|
||||
case DMA_TO_DEVICE:
|
||||
return prot | IOMMU_READ;
|
||||
case DMA_FROM_DEVICE:
|
||||
return prot | IOMMU_WRITE;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
|
||||
size_t size, u64 dma_limit, struct device *dev)
|
||||
{
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
unsigned long shift, iova_len, iova = 0;
|
||||
|
||||
if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
|
||||
cookie->msi_iova += size;
|
||||
return cookie->msi_iova - size;
|
||||
}
|
||||
|
||||
shift = iova_shift(iovad);
|
||||
iova_len = size >> shift;
|
||||
/*
|
||||
* Freeing non-power-of-two-sized allocations back into the IOVA caches
|
||||
* will come back to bite us badly, so we have to waste a bit of space
|
||||
* rounding up anything cacheable to make sure that can't happen. The
|
||||
* order of the unadjusted size will still match upon freeing.
|
||||
*/
|
||||
if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
|
||||
iova_len = roundup_pow_of_two(iova_len);
|
||||
|
||||
dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
|
||||
|
||||
if (domain->geometry.force_aperture)
|
||||
dma_limit = min_t(u64, dma_limit, domain->geometry.aperture_end);
|
||||
|
||||
/* Try to get PCI devices a SAC address */
|
||||
if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
|
||||
iova = alloc_iova_fast(iovad, iova_len,
|
||||
DMA_BIT_MASK(32) >> shift, false);
|
||||
|
||||
if (!iova)
|
||||
iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
|
||||
true);
|
||||
|
||||
return (dma_addr_t)iova << shift;
|
||||
}
|
||||
|
||||
static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
|
||||
dma_addr_t iova, size_t size)
|
||||
{
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
|
||||
/* The MSI case is only ever cleaning up its most recent allocation */
|
||||
if (cookie->type == IOMMU_DMA_MSI_COOKIE)
|
||||
cookie->msi_iova -= size;
|
||||
else if (cookie->fq_domain) /* non-strict mode */
|
||||
queue_iova(iovad, iova_pfn(iovad, iova),
|
||||
size >> iova_shift(iovad), 0);
|
||||
else
|
||||
free_iova_fast(iovad, iova_pfn(iovad, iova),
|
||||
size >> iova_shift(iovad));
|
||||
}
|
||||
|
||||
static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
|
||||
size_t size)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
size_t iova_off = iova_offset(iovad, dma_addr);
|
||||
struct iommu_iotlb_gather iotlb_gather;
|
||||
size_t unmapped;
|
||||
|
||||
dma_addr -= iova_off;
|
||||
size = iova_align(iovad, size + iova_off);
|
||||
iommu_iotlb_gather_init(&iotlb_gather);
|
||||
|
||||
unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
|
||||
WARN_ON(unmapped != size);
|
||||
|
||||
if (!cookie->fq_domain)
|
||||
iommu_iotlb_sync(domain, &iotlb_gather);
|
||||
iommu_dma_free_iova(cookie, dma_addr, size);
|
||||
}
|
||||
|
||||
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
|
||||
size_t size, int prot, u64 dma_mask)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
size_t iova_off = iova_offset(iovad, phys);
|
||||
dma_addr_t iova;
|
||||
|
||||
if (unlikely(iommu_dma_deferred_attach(dev, domain)))
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
size = iova_align(iovad, size + iova_off);
|
||||
|
||||
iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
|
||||
if (!iova)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
|
||||
iommu_dma_free_iova(cookie, iova, size);
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
return iova + iova_off;
|
||||
}
|
||||
|
||||
void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev,
|
||||
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
phys_addr_t phys;
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(phys, size, dir);
|
||||
|
||||
if (is_pswiotlb_buffer(dev, nid, phys, &pool))
|
||||
pswiotlb_sync_single_for_cpu(dev, nid, phys, size, dir, pool);
|
||||
|
||||
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
|
||||
return;
|
||||
|
||||
if (is_swiotlb_buffer(phys))
|
||||
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
|
||||
//swiotlb_sync_single_for_cpu(dev, phys, size, dir);
|
||||
} else {
|
||||
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
|
||||
return;
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(phys, size, dir);
|
||||
if (is_swiotlb_buffer(phys))
|
||||
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
|
||||
//swiotlb_sync_single_for_cpu(dev, phys, size, dir);
|
||||
}
|
||||
}
|
||||
|
||||
void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev,
|
||||
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
phys_addr_t phys;
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
if (is_pswiotlb_buffer(dev, nid, phys, &pool))
|
||||
pswiotlb_sync_single_for_device(dev, nid, phys, size, dir, pool);
|
||||
|
||||
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
|
||||
return;
|
||||
} else {
|
||||
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
|
||||
return;
|
||||
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
}
|
||||
|
||||
if (is_swiotlb_buffer(phys))
|
||||
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
|
||||
//swiotlb_sync_single_for_device(dev, phys, size, dir);
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_device(phys, size, dir);
|
||||
}
|
||||
|
||||
void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sgl, int nelems,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
int nid = dev->numa_node;
|
||||
dma_addr_t start_orig;
|
||||
phys_addr_t phys;
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
start_orig = sg_dma_address(sgl);
|
||||
for_each_sg(sgl, sg, nelems, i) {
|
||||
if (dir != DMA_TO_DEVICE) {
|
||||
unsigned int s_iova_off = iova_offset(iovad, sg->offset);
|
||||
|
||||
if (i > 0)
|
||||
start_orig += s_iova_off;
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), start_orig);
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(phys, sg->length, dir);
|
||||
|
||||
if (is_pswiotlb_buffer(dev, nid, phys, &pool))
|
||||
pswiotlb_sync_single_for_cpu(dev, nid, phys,
|
||||
sg->length, dir, pool);
|
||||
start_orig -= s_iova_off;
|
||||
start_orig += iova_align(iovad, sg->length + s_iova_off);
|
||||
} else {
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
|
||||
return;
|
||||
|
||||
for_each_sg(sgl, sg, nelems, i) {
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
|
||||
|
||||
if (is_swiotlb_buffer(sg_phys(sg)))
|
||||
swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
|
||||
dir, SYNC_FOR_CPU);
|
||||
//swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
|
||||
// sg->length, dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sgl, int nelems,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
for_each_sg(sgl, sg, nelems, i) {
|
||||
if (is_pswiotlb_buffer(dev, nid, sg_phys(sg), &pool))
|
||||
pswiotlb_sync_single_for_device(dev, nid, sg_phys(sg),
|
||||
sg->length, dir, pool);
|
||||
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
|
||||
continue;
|
||||
if (is_swiotlb_buffer(sg_phys(sg)))
|
||||
swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
|
||||
dir, SYNC_FOR_DEVICE);
|
||||
//swiotlb_sync_single_for_device(dev, sg_phys(sg),
|
||||
// sg->length, dir);
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
|
||||
}
|
||||
} else {
|
||||
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
|
||||
return;
|
||||
|
||||
for_each_sg(sgl, sg, nelems, i) {
|
||||
if (is_swiotlb_buffer(sg_phys(sg)))
|
||||
swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
|
||||
dir, SYNC_FOR_CPU);
|
||||
//swiotlb_sync_single_for_device(dev, sg_phys(sg),
|
||||
// sg->length, dir);
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page,
|
||||
unsigned long offset, size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
phys_addr_t phys = page_to_phys(page) + offset;
|
||||
bool coherent = dev_is_dma_coherent(dev);
|
||||
|
||||
int prot = dma_info_to_prot(dir, coherent, attrs);
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
size_t aligned_size = size;
|
||||
dma_addr_t iova, dma_mask = dma_get_mask(dev);
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
/*
|
||||
* If both the physical buffer start address and size are
|
||||
* page aligned, we don't need to use a bounce page.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
|
||||
iova_offset(iovad, phys | size)) {
|
||||
void *padding_start;
|
||||
size_t padding_size;
|
||||
|
||||
aligned_size = iova_align(iovad, size);
|
||||
phys = swiotlb_tbl_map_single(dev, phys, size,
|
||||
aligned_size, dir, attrs);
|
||||
|
||||
if (phys == DMA_MAPPING_ERROR)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
/* Cleanup the padding area. */
|
||||
padding_start = phys_to_virt(phys);
|
||||
padding_size = aligned_size;
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
|
||||
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
|
||||
padding_start += size;
|
||||
padding_size -= size;
|
||||
}
|
||||
|
||||
memset(padding_start, 0, padding_size);
|
||||
}
|
||||
|
||||
/* check whether dma addr is in local node */
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
if (dir != DMA_TO_DEVICE) {
|
||||
if (unlikely(!dma_is_in_local_node(dev, nid, phys, aligned_size))) {
|
||||
aligned_size = iova_align(iovad, size);
|
||||
phys = pswiotlb_tbl_map_single(dev, nid, phys, size,
|
||||
aligned_size, iova_mask(iovad),
|
||||
dir, attrs);
|
||||
if (phys == DMA_MAPPING_ERROR) {
|
||||
phys = page_to_phys(page) + offset;
|
||||
dev_warn_once(dev,
|
||||
"Failed to allocate memory from pswiotlb, fall back to non-local dma\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
arch_sync_dma_for_device(phys, size, dir);
|
||||
|
||||
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
|
||||
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
|
||||
swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
|
||||
if (iova == DMA_MAPPING_ERROR && is_pswiotlb_buffer(dev, nid, phys, &pool))
|
||||
pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool);
|
||||
return iova;
|
||||
}
|
||||
|
||||
void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
phys_addr_t phys;
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
phys = iommu_iova_to_phys(domain, dma_handle);
|
||||
if (WARN_ON(!phys))
|
||||
return;
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(phys, size, dir);
|
||||
|
||||
__iommu_dma_unmap(dev, dma_handle, size);
|
||||
|
||||
if (unlikely(is_swiotlb_buffer(phys)))
|
||||
swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
|
||||
|
||||
if (is_pswiotlb_active(dev) &&
|
||||
is_pswiotlb_buffer(dev, nid, phys, &pool))
|
||||
pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool);
|
||||
}
|
||||
|
||||
static void iommu_dma_unmap_page_sg(struct device *dev, dma_addr_t dma_handle,
|
||||
size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
phys_addr_t phys;
|
||||
int nid = dev->numa_node;
|
||||
struct p_io_tlb_pool *pool;
|
||||
|
||||
phys = iommu_iova_to_phys(domain, dma_handle);
|
||||
|
||||
if (WARN_ON(!phys))
|
||||
return;
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu(phys, size, dir);
|
||||
|
||||
if (is_pswiotlb_buffer(dev, nid, phys, &pool))
|
||||
pswiotlb_tbl_unmap_single(dev, nid, phys, offset, size, dir, attrs, pool);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a successfully-mapped scatterlist to give back to the caller.
|
||||
*
|
||||
* At this point the segments are already laid out by pswiotlb_iommu_dma_map_sg() to
|
||||
* avoid individually crossing any boundaries, so we merely need to check a
|
||||
* segment's start address to avoid concatenating across one.
|
||||
*/
|
||||
static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
|
||||
dma_addr_t dma_addr)
|
||||
{
|
||||
struct scatterlist *s, *cur = sg;
|
||||
unsigned long seg_mask = dma_get_seg_boundary(dev);
|
||||
unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
|
||||
int i, count = 0;
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
/* Restore this segment's original unaligned fields first */
|
||||
unsigned int s_iova_off = sg_dma_address(s);
|
||||
unsigned int s_length = sg_dma_len(s);
|
||||
unsigned int s_iova_len = s->length;
|
||||
|
||||
s->offset += s_iova_off;
|
||||
s->length = s_length;
|
||||
sg_dma_address(s) = DMA_MAPPING_ERROR;
|
||||
sg_dma_len(s) = 0;
|
||||
|
||||
/*
|
||||
* Now fill in the real DMA data. If...
|
||||
* - there is a valid output segment to append to
|
||||
* - and this segment starts on an IOVA page boundary
|
||||
* - but doesn't fall at a segment boundary
|
||||
* - and wouldn't make the resulting output segment too long
|
||||
*/
|
||||
if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
|
||||
(max_len - cur_len >= s_length)) {
|
||||
/* ...then concatenate it with the previous one */
|
||||
cur_len += s_length;
|
||||
} else {
|
||||
/* Otherwise start the next output segment */
|
||||
if (i > 0)
|
||||
cur = sg_next(cur);
|
||||
cur_len = s_length;
|
||||
count++;
|
||||
|
||||
sg_dma_address(cur) = dma_addr + s_iova_off;
|
||||
}
|
||||
|
||||
sg_dma_len(cur) = cur_len;
|
||||
dma_addr += s_iova_len;
|
||||
|
||||
if (s_length + s_iova_off < s_iova_len)
|
||||
cur_len = 0;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* If mapping failed, then just restore the original list,
|
||||
* but making sure the DMA fields are invalidated.
|
||||
*/
|
||||
static void __invalidate_sg(struct scatterlist *sg, int nents)
|
||||
{
|
||||
struct scatterlist *s;
|
||||
int i;
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
if (sg_dma_address(s) != DMA_MAPPING_ERROR)
|
||||
s->offset += sg_dma_address(s);
|
||||
if (sg_dma_len(s))
|
||||
s->length = sg_dma_len(s);
|
||||
sg_dma_address(s) = DMA_MAPPING_ERROR;
|
||||
sg_dma_len(s) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void iommu_dma_unmap_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct scatterlist *s;
|
||||
int i;
|
||||
|
||||
for_each_sg(sg, s, nents, i)
|
||||
pswiotlb_iommu_dma_unmap_page(dev, sg_dma_address(s),
|
||||
sg_dma_len(s), dir, attrs);
|
||||
}
|
||||
|
||||
void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg,
|
||||
unsigned long iova_start, size_t mapped, int nents,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
dma_addr_t start, start_orig;
|
||||
struct scatterlist *s;
|
||||
struct scatterlist *sg_orig = sg;
|
||||
int i;
|
||||
|
||||
start = iova_start;
|
||||
start_orig = start;
|
||||
for_each_sg(sg_orig, s, nents, i) {
|
||||
if (!mapped || (start_orig > (start + mapped)))
|
||||
break;
|
||||
if (s->length == 0)
|
||||
break;
|
||||
iommu_dma_unmap_page_sg(dev, start_orig, 0,
|
||||
s->length, dir, attrs);
|
||||
start_orig += s->length;
|
||||
}
|
||||
}
|
||||
|
||||
static int iommu_dma_map_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct scatterlist *s;
|
||||
int i;
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
sg_dma_address(s) = pswiotlb_iommu_dma_map_page(dev, sg_page(s),
|
||||
s->offset, s->length, dir, attrs);
|
||||
if (sg_dma_address(s) == DMA_MAPPING_ERROR)
|
||||
goto out_unmap;
|
||||
sg_dma_len(s) = s->length;
|
||||
}
|
||||
|
||||
return nents;
|
||||
|
||||
out_unmap:
|
||||
iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* The DMA API client is passing in a scatterlist which could describe
|
||||
* any old buffer layout, but the IOMMU API requires everything to be
|
||||
* aligned to IOMMU pages. Hence the need for this complicated bit of
|
||||
* impedance-matching, to be able to hand off a suitably-aligned list,
|
||||
* but still preserve the original offsets and sizes for the caller.
|
||||
*/
|
||||
int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
struct scatterlist *s, *prev = NULL;
|
||||
int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
|
||||
dma_addr_t iova;
|
||||
size_t iova_len = 0;
|
||||
unsigned long mask = dma_get_seg_boundary(dev);
|
||||
ssize_t ret;
|
||||
int i;
|
||||
|
||||
if (unlikely(iommu_dma_deferred_attach(dev, domain)))
|
||||
return 0;
|
||||
|
||||
if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev)
|
||||
&& ((nents == 1) && (sg->length < PAGE_SIZE)))
|
||||
return iommu_dma_map_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs);
|
||||
|
||||
if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
|
||||
|
||||
/*
|
||||
* Work out how much IOVA space we need, and align the segments to
|
||||
* IOVA granules for the IOMMU driver to handle. With some clever
|
||||
* trickery we can modify the list in-place, but reversibly, by
|
||||
* stashing the unaligned parts in the as-yet-unused DMA fields.
|
||||
*/
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
size_t s_iova_off = iova_offset(iovad, s->offset);
|
||||
size_t s_length = s->length;
|
||||
size_t pad_len = (mask - iova_len + 1) & mask;
|
||||
|
||||
sg_dma_address(s) = s_iova_off;
|
||||
sg_dma_len(s) = s_length;
|
||||
s->offset -= s_iova_off;
|
||||
s_length = iova_align(iovad, s_length + s_iova_off);
|
||||
s->length = s_length;
|
||||
|
||||
/*
|
||||
* Due to the alignment of our single IOVA allocation, we can
|
||||
* depend on these assumptions about the segment boundary mask:
|
||||
* - If mask size >= IOVA size, then the IOVA range cannot
|
||||
* possibly fall across a boundary, so we don't care.
|
||||
* - If mask size < IOVA size, then the IOVA range must start
|
||||
* exactly on a boundary, therefore we can lay things out
|
||||
* based purely on segment lengths without needing to know
|
||||
* the actual addresses beforehand.
|
||||
* - The mask must be a power of 2, so pad_len == 0 if
|
||||
* iova_len == 0, thus we cannot dereference prev the first
|
||||
* time through here (i.e. before it has a meaningful value).
|
||||
*/
|
||||
if (pad_len && pad_len < s_length - 1) {
|
||||
prev->length += pad_len;
|
||||
iova_len += pad_len;
|
||||
}
|
||||
|
||||
iova_len += s_length;
|
||||
prev = s;
|
||||
}
|
||||
|
||||
iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
|
||||
if (!iova) {
|
||||
ret = -ENOMEM;
|
||||
goto out_restore_sg;
|
||||
}
|
||||
|
||||
/*
|
||||
* We'll leave any physical concatenation to the IOMMU driver's
|
||||
* implementation - it knows better than we do.
|
||||
*/
|
||||
if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev))
|
||||
ret = pswiotlb_iommu_map_sg_atomic_dma(dev, domain,
|
||||
iova, sg, nents, prot, dir, attrs);
|
||||
else
|
||||
ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
|
||||
|
||||
if (ret < iova_len)
|
||||
goto out_free_iova;
|
||||
|
||||
return __finalise_sg(dev, sg, nents, iova);
|
||||
|
||||
out_free_iova:
|
||||
iommu_dma_free_iova(cookie, iova, iova_len);
|
||||
out_restore_sg:
|
||||
__invalidate_sg(sg, nents);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
dma_addr_t start, end, start_orig;
|
||||
struct scatterlist *tmp, *s;
|
||||
struct scatterlist *sg_orig = sg;
|
||||
int i;
|
||||
struct iommu_domain *domain = iommu_get_dma_domain(dev);
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
|
||||
if ((dir != DMA_TO_DEVICE) && ((nents == 1) && (sg->length < PAGE_SIZE))) {
|
||||
iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
|
||||
|
||||
/*
|
||||
* The scatterlist segments are mapped into a single
|
||||
* contiguous IOVA allocation, so this is incredibly easy.
|
||||
*/
|
||||
start = sg_dma_address(sg);
|
||||
|
||||
if (is_pswiotlb_active(dev)) {
|
||||
/* check whether dma addr is in local node */
|
||||
start_orig = start;
|
||||
if (dir != DMA_TO_DEVICE) {
|
||||
for_each_sg(sg_orig, s, nents, i) {
|
||||
unsigned int s_iova_off = iova_offset(iovad, s->offset);
|
||||
|
||||
if (i > 0)
|
||||
start_orig += s_iova_off;
|
||||
iommu_dma_unmap_page_sg(dev, start_orig,
|
||||
s_iova_off, s->length,
|
||||
dir, attrs);
|
||||
start_orig -= s_iova_off;
|
||||
start_orig += iova_align(iovad, s->length + s_iova_off);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for_each_sg(sg_next(sg), tmp, nents - 1, i) {
|
||||
if (sg_dma_len(tmp) == 0)
|
||||
break;
|
||||
sg = tmp;
|
||||
}
|
||||
|
||||
end = sg_dma_address(sg) + sg_dma_len(sg);
|
||||
__iommu_dma_unmap(dev, start, end - start);
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Auxiliary DMA operations used by arch-independent dma-mapping
|
||||
* routines when Phytium software IO tlb is required.
|
||||
*
|
||||
* Copyright (c) 2024, Phytium Technology Co., Ltd.
|
||||
*/
|
||||
#include <linux/memblock.h> /* for max_pfn */
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include "../debug.h"
|
||||
#include "../direct.h"
|
||||
#include "pswiotlb-dma.h"
|
||||
|
||||
/*
|
||||
* The following functions are ported from
|
||||
* ./drivers/dma/mapping.c
|
||||
* static bool dma_go_direct(struct device *dev, dma_addr_t mask,
|
||||
* const struct dma_map_ops *ops);
|
||||
* static inline bool dma_map_direct(struct device *dev,
|
||||
* const struct dma_map_ops *ops);
|
||||
*/
|
||||
|
||||
static bool dma_go_direct(struct device *dev, dma_addr_t mask,
|
||||
const struct dma_map_ops *ops)
|
||||
{
|
||||
if (likely(!ops))
|
||||
return true;
|
||||
#ifdef CONFIG_DMA_OPS_BYPASS
|
||||
if (dev->dma_ops_bypass)
|
||||
return min_not_zero(mask, dev->bus_dma_limit) >=
|
||||
dma_direct_get_required_mask(dev);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool dma_map_direct(struct device *dev,
|
||||
const struct dma_map_ops *ops)
|
||||
{
|
||||
return dma_go_direct(dev, *dev->dma_mask, ops);
|
||||
}
|
||||
dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page,
|
||||
size_t offset, size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
dma_addr_t addr;
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
addr = pswiotlb_dma_direct_map_page(dev, page, offset, size, dir, attrs);
|
||||
else
|
||||
addr = pswiotlb_iommu_dma_map_page(dev, page, offset, size, dir, attrs);
|
||||
debug_dma_map_page(dev, page, offset, size, dir, addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
pswiotlb_dma_direct_unmap_page(dev, addr, size, dir, attrs);
|
||||
else if (ops->unmap_page)
|
||||
pswiotlb_iommu_dma_unmap_page(dev, addr, size, dir, attrs);
|
||||
debug_dma_unmap_page(dev, addr, size, dir);
|
||||
}
|
||||
|
||||
int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
int ents;
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
ents = pswiotlb_dma_direct_map_sg(dev, sg, nents, dir, attrs);
|
||||
else
|
||||
ents = pswiotlb_iommu_dma_map_sg(dev, sg, nents, dir, attrs);
|
||||
|
||||
if (ents > 0)
|
||||
debug_dma_map_sg(dev, sg, nents, ents, dir);
|
||||
else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM &&
|
||||
ents != -EIO))
|
||||
return -EIO;
|
||||
|
||||
return ents;
|
||||
}
|
||||
|
||||
void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
pswiotlb_dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
|
||||
else if (ops->unmap_sg)
|
||||
pswiotlb_iommu_dma_unmap_sg(dev, sg, nents, dir, attrs);
|
||||
}
|
||||
|
||||
void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
pswiotlb_dma_direct_sync_single_for_cpu(dev, addr, size, dir);
|
||||
else if (ops->sync_single_for_cpu)
|
||||
pswiotlb_iommu_dma_sync_single_for_cpu(dev, addr, size, dir);
|
||||
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
|
||||
}
|
||||
|
||||
void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
pswiotlb_dma_direct_sync_single_for_device(dev, addr, size, dir);
|
||||
else if (ops->sync_single_for_device)
|
||||
pswiotlb_iommu_dma_sync_single_for_device(dev, addr, size, dir);
|
||||
debug_dma_sync_single_for_device(dev, addr, size, dir);
|
||||
}
|
||||
|
||||
void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
pswiotlb_dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir);
|
||||
else if (ops->sync_sg_for_cpu)
|
||||
pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
|
||||
debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
|
||||
}
|
||||
|
||||
void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
pswiotlb_dma_direct_sync_sg_for_device(dev, sg, nelems, dir);
|
||||
else if (ops->sync_sg_for_device)
|
||||
pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nelems, dir);
|
||||
debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue