[dpdk-dev] [PATCH 2/3] bus/fslmc: optimize physical to virtual address searching

Shreyansh Jain shreyansh.jain at nxp.com
Fri Apr 27 18:25:55 CEST 2018


With Hotplugging memory support, the order of memseg has been changed
from physically contiguous to virtual contiguous. FSLMC bus and dpaa2
drivers depend on PA to VA address conversion when in Physical
addressing mode.

This patch creates a list of blocks requested to be pinned to the
DPAA2 mempool. For searching physical addresses, it is expected that
it would belong to this list (from hardware pool) and hence it is
less expensive than memseg walks. Though, this has marginal impact on
performance vis-a-vis legacy mode with physically contiguous memsegs.

Signed-off-by: Shreyansh Jain <shreyansh.jain at nxp.com>

--

An optimized algorithm is being worked upon based on some recent patches
in hotplugging. That would improve/recover the performance. Until that
time, this patch is to be treated a stop-gap solution.
---
 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h  | 23 +++++++++++++++++
 drivers/mempool/dpaa2/dpaa2_hw_mempool.c | 43 ++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index c76393d45..da6e639dc 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -254,15 +254,38 @@ enum qbman_fd_format {
  */
 #define DPAA2_EQ_RESP_ALWAYS		1
 
+/* Various structures representing contiguous memory maps */
+struct dpaa2_memseg {
+	TAILQ_ENTRY(dpaa2_memseg) next;
+	char *vaddr;
+	rte_iova_t iova;
+	size_t len;
+};
+
+TAILQ_HEAD(dpaa2_memseg_list, dpaa2_memseg);
+extern struct dpaa2_memseg_list dpaa2_memsegs;
+
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 extern uint8_t dpaa2_virt_mode;
 static void *dpaa2_mem_ptov(phys_addr_t paddr) __attribute__((unused));
 /* todo - this is costly, need to write a fast coversion routine */
 static void *dpaa2_mem_ptov(phys_addr_t paddr)
 {
+	struct dpaa2_memseg *ms;
+
 	if (dpaa2_virt_mode)
 		return (void *)(size_t)paddr;
 
+	/* Check if the address is already part of the memseg list internally
+	 * maintained by the dpaa2 driver.
+	 */
+	TAILQ_FOREACH(ms, &dpaa2_memsegs, next) {
+		if (paddr >= ms->iova && paddr <
+			ms->iova + ms->len)
+			return RTE_PTR_ADD(ms->vaddr, (paddr - ms->iova));
+	}
+
+	/* If not, Fallback to full memseg list searching */
 	return rte_mem_iova2virt(paddr);
 }
 
diff --git a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c
index ce7a4c577..4c44c33cc 100644
--- a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c
+++ b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c
@@ -32,6 +32,13 @@
 struct dpaa2_bp_info rte_dpaa2_bpid_info[MAX_BPID];
 static struct dpaa2_bp_list *h_bp_list;
 
+/* List of all the memseg information locally maintained in dpaa2 driver. This
+ * is to optimize the PA_to_VA searches until a better mechanism (algo) is
+ * available.
+ */
+struct dpaa2_memseg_list dpaa2_memsegs
+	= TAILQ_HEAD_INITIALIZER(dpaa2_memsegs);
+
 /* Dynamic logging identified for mempool */
 int dpaa2_logtype_mempool;
 
@@ -358,6 +365,41 @@ rte_hw_mbuf_get_count(const struct rte_mempool *mp)
 	return num_of_bufs;
 }
 
+static int
+dpaa2_populate(struct rte_mempool *mp, unsigned int max_objs,
+	      void *vaddr, rte_iova_t paddr, size_t len,
+	      rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
+{
+	struct dpaa2_memseg *ms;
+
+	/* For each memory chunk pinned to the Mempool, a linked list of the
+	 * represeted memsegs is created for searching when PA to VA
+	 * conversion is required.
+	 */
+	ms = rte_zmalloc(NULL, sizeof(struct dpaa2_memseg), 0);
+	if (!ms) {
+		DPAA2_MEMPOOL_ERR("Unable to allocate internal memory.");
+		DPAA2_MEMPOOL_WARN("Fast Physical to Virtual Addr translation would not be available.");
+		/* If the element is not added, it would only lead to failure
+		 * in searching for the element and the logic would Fallback
+		 * to traditional DPDK memseg traversal code. So, this is not
+		 * a blocking error - but, error would be printed on screen.
+		 */
+		return 0;
+	}
+
+	ms->vaddr = vaddr;
+	ms->iova = paddr;
+	ms->len = len;
+	/* Head insertions are generally faster than tail insertions as the
+	 * buffers pinned are picked from rear end.
+	 */
+	TAILQ_INSERT_HEAD(&dpaa2_memsegs, ms, next);
+
+	return rte_mempool_op_populate_default(mp, max_objs, vaddr, paddr, len,
+					       obj_cb, obj_cb_arg);
+}
+
 struct rte_mempool_ops dpaa2_mpool_ops = {
 	.name = DPAA2_MEMPOOL_OPS_NAME,
 	.alloc = rte_hw_mbuf_create_pool,
@@ -365,6 +407,7 @@ struct rte_mempool_ops dpaa2_mpool_ops = {
 	.enqueue = rte_hw_mbuf_free_bulk,
 	.dequeue = rte_dpaa2_mbuf_alloc_bulk,
 	.get_count = rte_hw_mbuf_get_count,
+	.populate = dpaa2_populate,
 };
 
 MEMPOOL_REGISTER_OPS(dpaa2_mpool_ops);
-- 
2.14.1



More information about the dev mailing list