[dpdk-dev] [PATCH] bus/fslmc: support for hotplugging of memory

Shreyansh Jain shreyansh.jain at nxp.com
Thu Apr 5 16:14:14 CEST 2018


Restructure VFIO DMA code for handling hotplug memory events
(callbacks) and --legacy case.

Signed-off-by: Shreyansh Jain <shreyansh.jain at nxp.com>
---

###
This is based on the 16fbfef04a3 github repository. This is assuming
that changes already exists as done in patch 26/68.
Though, this can be a standalone, replacing 26/88. Though, the Makefile
changes don't exist in this.
Also, this just a first draft. I will push any review changes after this
incrementally over v4.
###

 drivers/bus/fslmc/fslmc_bus.c    |  15 ++++
 drivers/bus/fslmc/fslmc_vfio.c   | 161 +++++++++++++++++++++++++++++++++++----
 drivers/bus/fslmc/fslmc_vfio.h   |   1 +
 drivers/net/dpaa2/dpaa2_ethdev.c |   1 -
 4 files changed, 163 insertions(+), 15 deletions(-)

diff --git a/drivers/bus/fslmc/fslmc_bus.c b/drivers/bus/fslmc/fslmc_bus.c
index 5ee0beb85..50884ff3a 100644
--- a/drivers/bus/fslmc/fslmc_bus.c
+++ b/drivers/bus/fslmc/fslmc_bus.c
@@ -266,6 +266,21 @@ rte_fslmc_probe(void)
 		return 0;
 	}
 
+	if (rte_log_get_global_level() >= RTE_LOG_DEBUG)
+		rte_dump_physmem_layout(stdout);
+
+	/* Map existing segments as well as, in case of hotpluggable memory,
+	 * install callback handler.
+	 */
+	ret = rte_fslmc_vfio_dmamap();
+	if (ret) {
+		FSLMC_BUS_LOG(ERR, "Unable to DMA map existing VAs: (%d)",
+			      ret);
+		/* Not continuing ahead */
+		FSLMC_BUS_LOG(ERR, "FSLMC VFIO Mapping failed");
+		return 0;
+	}
+
 	ret = fslmc_vfio_process_group();
 	if (ret) {
 		FSLMC_BUS_LOG(ERR, "Unable to setup devices %d", ret);
diff --git a/drivers/bus/fslmc/fslmc_vfio.c b/drivers/bus/fslmc/fslmc_vfio.c
index 31831e3ce..60725fb70 100644
--- a/drivers/bus/fslmc/fslmc_vfio.c
+++ b/drivers/bus/fslmc/fslmc_vfio.c
@@ -30,6 +30,7 @@
 #include <rte_kvargs.h>
 #include <rte_dev.h>
 #include <rte_bus.h>
+#include <rte_eal_memconfig.h>
 
 #include "rte_fslmc.h"
 #include "fslmc_vfio.h"
@@ -193,11 +194,68 @@ static int vfio_map_irq_region(struct fslmc_vfio_group *group)
 	return -errno;
 }
 
+static int fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+static int fslmc_unmap_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+
+static void
+fslmc_memevent_cb(enum rte_mem_event type, const void *addr, size_t len)
+{
+	struct rte_memseg_list *msl;
+	struct rte_memseg *ms;
+	size_t cur_len = 0, map_len = 0;
+	uint64_t pgsz, virt_addr;
+	rte_iova_t iova_addr;
+	int ret;
+
+	msl = rte_mem_virt2memseg_list(addr);
+	pgsz = msl->page_sz;
+
+	while (cur_len < len) {
+		const void *va = RTE_PTR_ADD(addr, cur_len);
+
+		ms = rte_mem_virt2memseg(va, msl);
+		iova_addr = rte_mem_virt2iova(va);
+		virt_addr = ms->addr_64;
+
+		map_len = len - map_len;
+		/* Can only work with max pg_sz */
+		if (map_len > pgsz)
+			map_len = pgsz;
+
+		FSLMC_VFIO_LOG(DEBUG, "Calling with type=%d, va=%p,"
+			" virt_addr=%lX, iova=%lu, map_len=%lu\n",
+			type, va, virt_addr, iova_addr, map_len);
+
+		if (type == RTE_MEM_EVENT_ALLOC)
+			ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
+		else
+			ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
+
+		if (ret != 0) {
+			FSLMC_VFIO_LOG(ERR, "DMA Mapping/Unmapping failed. "
+				       "Map=%d, addr=%p, len=%lu, err:(%d)",
+				       type, va, map_len, ret);
+			return;
+		}
+
+		cur_len += map_len;
+	}
+
+	if (type == RTE_MEM_EVENT_ALLOC)
+		FSLMC_VFIO_LOG(DEBUG, "Total Mapped: addr=%p, len=%lu\n",
+			       addr, len);
+	else
+		FSLMC_VFIO_LOG(DEBUG, "Total Unmapped: addr=%p, len=%lu\n",
+			       addr, len);
+}
+
 static int
-fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len)
+#else
+fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr __rte_unused, size_t len)
+#endif
 {
-	int *n_segs = arg;
 	struct fslmc_vfio_group *group;
 	struct vfio_iommu_type1_dma_map dma_map = {
 		.argsz = sizeof(struct vfio_iommu_type1_dma_map),
@@ -205,10 +263,11 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
 	};
 	int ret;
 
-	dma_map.size = ms->len;
-	dma_map.vaddr = ms->addr_64;
+	dma_map.size = len;
+	dma_map.vaddr = vaddr;
+
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-	dma_map.iova = ms->iova;
+	dma_map.iova = iovaddr;
 #else
 	dma_map.iova = dma_map.vaddr;
 #endif
@@ -221,33 +280,102 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
 		return -1;
 	}
 
-	FSLMC_VFIO_LOG(DEBUG, "-->Initial SHM Virtual ADDR %llX",
-		     dma_map.vaddr);
-	FSLMC_VFIO_LOG(DEBUG, "-----> DMA size 0x%llX", dma_map.size);
-	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA,
-			&dma_map);
+	FSLMC_VFIO_LOG(DEBUG, "--> Map address: %llX, size: 0x%llX\n",
+		       dma_map.vaddr, dma_map.size);
+	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map);
 	if (ret) {
 		FSLMC_VFIO_LOG(ERR, "VFIO_IOMMU_MAP_DMA API(errno = %d)",
 				errno);
 		return -1;
 	}
-	(*n_segs)++;
+
 	return 0;
 }
 
+static int
+fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
+{
+	struct fslmc_vfio_group *group;
+	struct vfio_iommu_type1_dma_unmap dma_unmap = {
+		.argsz = sizeof(struct vfio_iommu_type1_dma_unmap),
+		.flags = 0,
+	};
+	int ret;
+
+	dma_unmap.size = len;
+	dma_unmap.iova = vaddr;
+
+	/* SET DMA MAP for IOMMU */
+	group = &vfio_group;
+
+	if (!group->container) {
+		FSLMC_VFIO_LOG(ERR, "Container is not connected ");
+		return -1;
+	}
+
+	FSLMC_VFIO_LOG(DEBUG, "--> Unmap address: %llX, size: 0x%llX\n",
+		       dma_unmap.iova, dma_unmap.size);
+	ret = ioctl(group->container->fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+	if (ret) {
+		FSLMC_VFIO_LOG(ERR, "VFIO_IOMMU_UNMAP_DMA API(errno = %d)",
+			       errno);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
+		 const struct rte_memseg *ms, void *arg)
+{
+	int *n_segs = arg;
+	int ret;
+
+	ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
+	if (ret)
+		FSLMC_VFIO_LOG(ERR, "Unable to VFIO map (addr=%p, len=%lu)\n",
+			       ms->addr, ms->len);
+	else
+		(*n_segs)++;
+
+	return ret;
+}
+
 int rte_fslmc_vfio_dmamap(void)
 {
-	int i = 0;
+	int i = 0, ret;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
 
 	if (is_dma_done)
 		return 0;
 
-	if (rte_memseg_walk(fslmc_vfio_map, &i) < 0)
+	/* Lock before parsing and registering callback to memory subsystem */
+	rte_rwlock_read_lock(mem_lock);
+
+	if (rte_memseg_walk(fslmc_dmamap_seg, &i) < 0) {
+		rte_rwlock_read_lock(mem_lock);
 		return -1;
+	}
+
+	ret = rte_mem_event_callback_register("fslmc_memevent_clb",
+					      fslmc_memevent_cb);
+	if (ret)
+		/* Cannot install callback handler - thus, DMA Mapping cannot
+		 * be done. But, this is possible for --legacy-mem option
+		 */
+		FSLMC_VFIO_LOG(DEBUG, "Unable to install memory handler");
 
+	FSLMC_VFIO_LOG(DEBUG, "Installed memory callback handler");
 	/* Verifying that at least single segment is available */
 	if (i <= 0) {
+		/* TODO: Is this verification required any more? What would
+		 * happen to non-legacy case where nothing was preallocated
+		 * thus causing i==0?
+		 */
 		FSLMC_VFIO_LOG(ERR, "No Segments found for VFIO Mapping");
+		rte_rwlock_read_unlock(mem_lock);
 		return -1;
 	}
 	FSLMC_VFIO_LOG(DEBUG, "Total %d segments found.", i);
@@ -258,6 +386,11 @@ int rte_fslmc_vfio_dmamap(void)
 	 */
 	vfio_map_irq_region(&vfio_group);
 
+	/* Existing segments have been mapped and memory callback for hotplug
+	 * has been installed.
+	 */
+	rte_rwlock_read_unlock(mem_lock);
+
 	is_dma_done = 1;
 
 	return 0;
diff --git a/drivers/bus/fslmc/fslmc_vfio.h b/drivers/bus/fslmc/fslmc_vfio.h
index e8fb3445f..e77e4c4ac 100644
--- a/drivers/bus/fslmc/fslmc_vfio.h
+++ b/drivers/bus/fslmc/fslmc_vfio.h
@@ -9,6 +9,7 @@
 #define _FSLMC_VFIO_H_
 
 #include <rte_vfio.h>
+#include <rte_memory.h>
 
 #include "eal_vfio.h"
 
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index 2fb7b2da7..962b90035 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -1850,7 +1850,6 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
 
 	eth_dev->rx_pkt_burst = dpaa2_dev_prefetch_rx;
 	eth_dev->tx_pkt_burst = dpaa2_dev_tx;
-	rte_fslmc_vfio_dmamap();
 
 	RTE_LOG(INFO, PMD, "%s: netdev created\n", eth_dev->data->name);
 	return 0;
-- 
2.14.1



More information about the dev mailing list