[dpdk-dev] [PATCH v3] eal: sPAPR IOMMU support in pci probing for vfio-pci in ppc64le

Chao Zhu chaozhu at linux.vnet.ibm.com
Tue Mar 7 13:03:04 CET 2017


-----Original Message-----
From: Gowrishankar [mailto:gowrishankar.m at linux.vnet.ibm.com] 
Sent: 2017年3月6日 23:04
To: dev at dpdk.org
Cc: Chao Zhu <chaozhu at linux.vnet.ibm.com>; Anatoly Burakov
<anatoly.burakov at intel.com>; Thomas Monjalon <thomas.monjalon at 6wind.com>;
Gowrishankar Muthukrishnan <gowrishankar.m at linux.vnet.ibm.com>
Subject: [PATCH v3] eal: sPAPR IOMMU support in pci probing for vfio-pci in
ppc64le

From: Gowrishankar Muthukrishnan <gowrishankar.m at linux.vnet.ibm.com>

Below changes adds pci probing support for vfio-pci devices in power8.

v3 - better validation for kernel not implementing few iocts called
v2 - kernel version checked and doc updated

Signed-off-by: Gowrishankar Muthukrishnan
<gowrishankar.m at linux.vnet.ibm.com>
---
 doc/guides/rel_notes/release_17_05.rst |  4 ++
lib/librte_eal/linuxapp/eal/eal_vfio.c | 90
++++++++++++++++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_vfio.h | 25 ++++++++++
 3 files changed, 119 insertions(+)

diff --git a/doc/guides/rel_notes/release_17_05.rst
b/doc/guides/rel_notes/release_17_05.rst
index e25ea9f..4b90036 100644
--- a/doc/guides/rel_notes/release_17_05.rst
+++ b/doc/guides/rel_notes/release_17_05.rst
@@ -42,6 +42,10 @@ New Features
      =========================================================


+* **Added powerpc support in pci probing for vfio-pci devices.**
+
+  sPAPR IOMMU based pci probing enabled for vfio-pci devices.
+
 Resolved Issues
 ---------------

diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c
b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 702f7a2..9377a66 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -50,12 +50,15 @@
 static struct vfio_config vfio_cfg;

 static int vfio_type1_dma_map(int);
+static int vfio_spapr_dma_map(int);
 static int vfio_noiommu_dma_map(int);

 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
 	/* x86 IOMMU, otherwise known as type 1 */
 	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+	/* ppc64 IOMMU, otherwise known as spapr */
+	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
 	/* IOMMU-less mode */
 	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},  }; @@ -540,
6 +543,93 @@ int vfio_setup_device(const char *sysfs_base, const char
*dev_addr,  }

 static int
+vfio_spapr_dma_map(int vfio_container_fd) {
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	int i, ret;
+
+	struct vfio_iommu_spapr_register_memory reg = {
+		.argsz = sizeof(reg),
+		.flags = 0
+	};
+	struct vfio_iommu_spapr_tce_info info = {
+		.argsz = sizeof(info),
+	};
+	struct vfio_iommu_spapr_tce_create create = {
+		.argsz = sizeof(create),
+	};
+	struct vfio_iommu_spapr_tce_remove remove = {
+		.argsz = sizeof(remove),
+	};
+
+	/* query spapr iommu info */
+	ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO,
&info);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "  cannot get iommu info, "
+				"error %i (%s)\n", errno, strerror(errno));
+		return -1;
+	}
+
+	/* remove default DMA of 32 bit window */
+	remove.start_addr = info.dma32_window_start;
+	ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE,
&remove);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "  cannot remove default DMA window, "
+				"error %i (%s)\n", errno, strerror(errno));
+		return -1;
+	}
+
+	/* calculate window size based on number of hugepages configured */
+	create.window_size = rte_eal_get_physmem_size();
+	create.page_shift = __builtin_ctzll(ms->hugepage_sz);
+	create.levels = 2;
+
+	ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE,
&create);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "  cannot create new DMA window, "
+				"error %i (%s)\n", errno, strerror(errno));
+		return -1;
+	}
+
+	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		struct vfio_iommu_type1_dma_map dma_map;
+
+		if (ms[i].addr == NULL)
+			break;
+
+		reg.vaddr = (uintptr_t) ms[i].addr;
+		reg.size = ms[i].len;
+		ret = ioctl(vfio_container_fd,
+			VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot register vaddr for
IOMMU, "
+				"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+
+		memset(&dma_map, 0, sizeof(dma_map));
+		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+		dma_map.vaddr = ms[i].addr_64;
+		dma_map.size = ms[i].len;
+		dma_map.iova = ms[i].phys_addr;
+		dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+				 VFIO_DMA_MAP_FLAG_WRITE;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA,
&dma_map);
+
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
+				"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+
+	}
+
+	return 0;
+}
+
+static int
 vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)  {
 	/* No-IOMMU mode does not need DMA mapping */ diff --git
a/lib/librte_eal/linuxapp/eal/eal_vfio.h
b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 29f7f3e..ac31a4f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -54,6 +54,31 @@

 #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU

+#ifndef VFIO_SPAPR_TCE_v2_IOMMU
+#define RTE_VFIO_SPAPR 7
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) 
+#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) 
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) 
+struct vfio_iommu_spapr_register_memory {
+	uint32_t argsz;
+	uint32_t flags;
+	uint64_t vaddr;
+	uint64_t size;
+};
+struct vfio_iommu_spapr_tce_create {
+	uint32_t argsz;
+	uint32_t page_shift;
+	uint64_t window_size;
+	uint32_t levels;
+};
+struct vfio_iommu_spapr_tce_remove {
+	uint32_t argsz;
+	uint64_t start_addr;
+};
+#else
+#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU #endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)  #define RTE_VFIO_NOIOMMU
8  #else
--
1.9.1
Acked-by: Chao Zhu <chaozhu at linux.vnet.ibm.com>



More information about the dev mailing list