[dpdk-dev] [PATCH v2 3/4] net/mlx4: remove Memory Region support

Yongseok Koh yskoh at mellanox.com
Wed May 9 13:09:05 CEST 2018


This patch removes current support of Memory Region (MR) in order to
accommodate the dynamic memory hotplug patch. This patch can be compiled
but traffic can't flow and HW will raise faults. Subsequent patches will
add new MR support.

Signed-off-by: Yongseok Koh <yskoh at mellanox.com>
---
 config/common_base           |   1 -
 doc/guides/nics/mlx4.rst     |   8 --
 drivers/net/mlx4/Makefile    |   4 -
 drivers/net/mlx4/mlx4.h      |  33 -------
 drivers/net/mlx4/mlx4_mr.c   | 222 -------------------------------------------
 drivers/net/mlx4/mlx4_rxq.c  |  11 +--
 drivers/net/mlx4/mlx4_rxtx.h |  34 +------
 drivers/net/mlx4/mlx4_txq.c  |  66 -------------
 8 files changed, 4 insertions(+), 375 deletions(-)

diff --git a/config/common_base b/config/common_base
index d525d9443..d55816e23 100644
--- a/config/common_base
+++ b/config/common_base
@@ -288,7 +288,6 @@ CONFIG_RTE_LIBRTE_AVF_16BYTE_RX_DESC=n
 CONFIG_RTE_LIBRTE_MLX4_PMD=n
 CONFIG_RTE_LIBRTE_MLX4_DEBUG=n
 CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=n
-CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8
 
 #
 # Compile burst-oriented Mellanox ConnectX-4 & ConnectX-5 (MLX5) PMD
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 9564f890a..25d339d94 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -85,14 +85,6 @@ These options can be modified in the ``.config`` file.
   adds additional run-time checks and debugging messages at the cost of
   lower performance.
 
-- ``CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE`` (default **8**)
-
-  Maximum number of cached memory pools (MPs) per TX queue. Each MP from
-  which buffers are to be transmitted must be associated to memory regions
-  (MRs). This is a slow operation that must be cached.
-
-  This value is always 1 for RX queues since they use a single MP.
-
 Environment variables
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index ac5b67f10..73f9d4056 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -69,10 +69,6 @@ else
 CFLAGS += -DNDEBUG -UPEDANTIC
 endif
 
-ifdef CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE
-CFLAGS += -DMLX4_PMD_TX_MP_CACHE=$(CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE)
-endif
-
 include $(RTE_SDK)/mk/rte.lib.mk
 
 # Generate and clean-up mlx4_autoconf.h.
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 415b7d40f..e0e1b5d4c 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -23,7 +23,6 @@
 #include <rte_ether.h>
 #include <rte_interrupts.h>
 #include <rte_mempool.h>
-#include <rte_spinlock.h>
 
 #ifndef IBV_RX_HASH_INNER
 /** This is not necessarily defined by supported RDMA core versions. */
@@ -42,17 +41,6 @@
 /** Fixed RSS hash key size in bytes. Cannot be modified. */
 #define MLX4_RSS_HASH_KEY_SIZE 40
 
-/**
- * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
- * from which buffers are to be transmitted will have to be mapped by this
- * driver to their own Memory Region (MR). This is a slow operation.
- *
- * This value is always 1 for RX queues.
- */
-#ifndef MLX4_PMD_TX_MP_CACHE
-#define MLX4_PMD_TX_MP_CACHE 8
-#endif
-
 /** Interrupt alarm timeout value in microseconds. */
 #define MLX4_INTR_ALARM_TIMEOUT 100000
 
@@ -78,18 +66,6 @@ struct rxq;
 struct txq;
 struct rte_flow;
 
-/** Memory region descriptor. */
-struct mlx4_mr {
-	LIST_ENTRY(mlx4_mr) next; /**< Next entry in list. */
-	uintptr_t start; /**< Base address for memory region. */
-	uintptr_t end; /**< End address for memory region. */
-	uint32_t lkey; /**< L_Key extracted from @p mr. */
-	uint32_t refcnt; /**< Reference count for this object. */
-	struct priv *priv; /**< Back pointer to private data. */
-	struct ibv_mr *mr; /**< Memory region associated with @p mp. */
-	struct rte_mempool *mp; /**< Target memory pool (mempool). */
-};
-
 /** Private data structure. */
 struct priv {
 	struct rte_eth_dev *dev; /**< Ethernet device. */
@@ -112,8 +88,6 @@ struct priv {
 	struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
 	LIST_HEAD(, mlx4_rss) rss; /**< Shared targets for Rx flow rules. */
 	LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
-	LIST_HEAD(, mlx4_mr) mr; /**< Registered memory regions. */
-	rte_spinlock_t mr_lock; /**< Lock for @p mr access. */
 	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
 	/**< Configured MAC addresses. Unused entries are zeroed. */
 };
@@ -156,11 +130,4 @@ void mlx4_rxq_intr_disable(struct priv *priv);
 int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
 int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
 
-/* mlx4_mr.c */
-
-struct mlx4_mr *mlx4_mr_get(struct priv *priv, struct rte_mempool *mp);
-void mlx4_mr_put(struct mlx4_mr *mr);
-uint32_t mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp,
-			 uint32_t i);
-
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index 8d5a6741d..3c87f6849 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -30,230 +30,8 @@
 #include <rte_malloc.h>
 #include <rte_memory.h>
 #include <rte_mempool.h>
-#include <rte_spinlock.h>
 
 #include "mlx4_glue.h"
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
-struct mlx4_check_mempool_data {
-	int ret;
-	char *start;
-	char *end;
-};
-
-/**
- * Called by mlx4_check_mempool() when iterating the memory chunks.
- *
- * @param[in] mp
- *   Pointer to memory pool (unused).
- * @param[in, out] data
- *   Pointer to shared buffer with mlx4_check_mempool().
- * @param[in] memhdr
- *   Pointer to mempool chunk header.
- * @param mem_idx
- *   Mempool element index (unused).
- */
-static void
-mlx4_check_mempool_cb(struct rte_mempool *mp, void *opaque,
-		      struct rte_mempool_memhdr *memhdr,
-		      unsigned int mem_idx)
-{
-	struct mlx4_check_mempool_data *data = opaque;
-
-	(void)mp;
-	(void)mem_idx;
-	/* It already failed, skip the next chunks. */
-	if (data->ret != 0)
-		return;
-	/* It is the first chunk. */
-	if (data->start == NULL && data->end == NULL) {
-		data->start = memhdr->addr;
-		data->end = data->start + memhdr->len;
-		return;
-	}
-	if (data->end == memhdr->addr) {
-		data->end += memhdr->len;
-		return;
-	}
-	if (data->start == (char *)memhdr->addr + memhdr->len) {
-		data->start -= memhdr->len;
-		return;
-	}
-	/* Error, mempool is not virtually contiguous. */
-	data->ret = -1;
-}
-
-/**
- * Check if a mempool can be used: it must be virtually contiguous.
- *
- * @param[in] mp
- *   Pointer to memory pool.
- * @param[out] start
- *   Pointer to the start address of the mempool virtual memory area.
- * @param[out] end
- *   Pointer to the end address of the mempool virtual memory area.
- *
- * @return
- *   0 on success (mempool is virtually contiguous), -1 on error.
- */
-static int
-mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start, uintptr_t *end)
-{
-	struct mlx4_check_mempool_data data;
-
-	memset(&data, 0, sizeof(data));
-	rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
-	*start = (uintptr_t)data.start;
-	*end = (uintptr_t)data.end;
-	return data.ret;
-}
-
-/**
- * Obtain a memory region from a memory pool.
- *
- * If a matching memory region already exists, it is returned with its
- * reference count incremented, otherwise a new one is registered.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mp
- *   Pointer to memory pool.
- *
- * @return
- *   Memory region pointer, NULL in case of error and rte_errno is set.
- */
-struct mlx4_mr *
-mlx4_mr_get(struct priv *priv, struct rte_mempool *mp)
-{
-	const struct rte_memseg *ms;
-	uintptr_t start;
-	uintptr_t end;
-	struct mlx4_mr *mr;
-
-	if (mlx4_check_mempool(mp, &start, &end) != 0) {
-		rte_errno = EINVAL;
-		ERROR("mempool %p: not virtually contiguous",
-			(void *)mp);
-		return NULL;
-	}
-	DEBUG("mempool %p area start=%p end=%p size=%zu",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	/* Round start and end to page boundary if found in memory segments. */
-	ms = rte_mem_virt2memseg((void *)start, NULL);
-	if (ms != NULL)
-		start = RTE_ALIGN_FLOOR(start, ms->hugepage_sz);
-	end = RTE_ALIGN_CEIL(end, ms->hugepage_sz);
-	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	rte_spinlock_lock(&priv->mr_lock);
-	LIST_FOREACH(mr, &priv->mr, next)
-		if (mp == mr->mp && start >= mr->start && end <= mr->end)
-			break;
-	if (mr) {
-		++mr->refcnt;
-		goto release;
-	}
-	mr = rte_malloc(__func__, sizeof(*mr), 0);
-	if (!mr) {
-		rte_errno = ENOMEM;
-		goto release;
-	}
-	*mr = (struct mlx4_mr){
-		.start = start,
-		.end = end,
-		.refcnt = 1,
-		.priv = priv,
-		.mr = mlx4_glue->reg_mr(priv->pd, (void *)start, end - start,
-					IBV_ACCESS_LOCAL_WRITE),
-		.mp = mp,
-	};
-	if (mr->mr) {
-		mr->lkey = mr->mr->lkey;
-		LIST_INSERT_HEAD(&priv->mr, mr, next);
-	} else {
-		rte_free(mr);
-		mr = NULL;
-		rte_errno = errno ? errno : EINVAL;
-	}
-release:
-	rte_spinlock_unlock(&priv->mr_lock);
-	return mr;
-}
-
-/**
- * Release a memory region.
- *
- * This function decrements its reference count and destroys it after
- * reaching 0.
- *
- * Note to avoid race conditions given this function may be used from the
- * data plane, it's extremely important that each user holds its own
- * reference.
- *
- * @param mr
- *   Memory region to release.
- */
-void
-mlx4_mr_put(struct mlx4_mr *mr)
-{
-	struct priv *priv = mr->priv;
-
-	rte_spinlock_lock(&priv->mr_lock);
-	assert(mr->refcnt);
-	if (--mr->refcnt)
-		goto release;
-	LIST_REMOVE(mr, next);
-	claim_zero(mlx4_glue->dereg_mr(mr->mr));
-	rte_free(mr);
-release:
-	rte_spinlock_unlock(&priv->mr_lock);
-}
-
-/**
- * Add memory region (MR) <-> memory pool (MP) association to txq->mp2mr[].
- * If mp2mr[] is full, remove an entry first.
- *
- * @param txq
- *   Pointer to Tx queue structure.
- * @param[in] mp
- *   Memory pool for which a memory region lkey must be added.
- * @param[in] i
- *   Index in memory pool (MP) where to add memory region (MR).
- *
- * @return
- *   Added mr->lkey on success, (uint32_t)-1 on failure.
- */
-uint32_t
-mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp, uint32_t i)
-{
-	struct mlx4_mr *mr;
-
-	/* Add a new entry, register MR first. */
-	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
-	      (void *)txq, mp->name, (void *)mp);
-	mr = mlx4_mr_get(txq->priv, mp);
-	if (unlikely(mr == NULL)) {
-		DEBUG("%p: unable to configure MR, mlx4_mr_get() failed",
-		      (void *)txq);
-		return (uint32_t)-1;
-	}
-	if (unlikely(i == RTE_DIM(txq->mp2mr))) {
-		/* Table is full, remove oldest entry. */
-		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
-		      (void *)txq);
-		--i;
-		mlx4_mr_put(txq->mp2mr[0].mr);
-		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
-			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
-	}
-	/* Store the new entry. */
-	txq->mp2mr[i].mp = mp;
-	txq->mp2mr[i].mr = mr;
-	txq->mp2mr[i].lkey = mr->lkey;
-	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
-	      (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
-	return txq->mp2mr[i].lkey;
-}
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 65f099423..5621d5bd4 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -583,7 +583,7 @@ mlx4_rxq_attach(struct rxq *rxq)
 			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
 								  uintptr_t)),
 			.byte_count = rte_cpu_to_be_32(buf->data_len),
-			.lkey = rte_cpu_to_be_32(rxq->mr->lkey),
+			.lkey = UINT32_MAX,
 		};
 		(*elts)[i] = buf;
 	}
@@ -883,13 +883,6 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		      1 << rxq->sges_n);
 		goto error;
 	}
-	/* Use the entire Rx mempool as the memory region. */
-	rxq->mr = mlx4_mr_get(priv, mp);
-	if (!rxq->mr) {
-		ERROR("%p: MR creation failure: %s",
-		      (void *)dev, strerror(rte_errno));
-		goto error;
-	}
 	if (dev->data->dev_conf.intr_conf.rxq) {
 		rxq->channel = mlx4_glue->create_comp_channel(priv->ctx);
 		if (rxq->channel == NULL) {
@@ -947,7 +940,5 @@ mlx4_rx_queue_release(void *dpdk_rxq)
 	assert(!rxq->rq_db);
 	if (rxq->channel)
 		claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel));
-	if (rxq->mr)
-		mlx4_mr_put(rxq->mr);
 	rte_free(rxq);
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 2dfee957f..2f9d3798b 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -39,7 +39,6 @@ struct mlx4_rxq_stats {
 struct rxq {
 	struct priv *priv; /**< Back pointer to private data. */
 	struct rte_mempool *mp; /**< Memory pool for allocations. */
-	struct mlx4_mr *mr; /**< Memory region. */
 	struct ibv_cq *cq; /**< Completion queue. */
 	struct ibv_wq *wq; /**< Work queue. */
 	struct ibv_comp_channel *channel; /**< Rx completion channel. */
@@ -109,11 +108,6 @@ struct txq {
 	uint32_t lb:1; /**< Whether packets should be looped back by eSwitch. */
 	uint8_t *bounce_buf;
 	/**< Memory used for storing the first DWORD of data TXBBs. */
-	struct {
-		const struct rte_mempool *mp; /**< Cached memory pool. */
-		struct mlx4_mr *mr; /**< Memory region (for mp). */
-		uint32_t lkey; /**< mr->lkey copy. */
-	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /**< MP to MR translation table. */
 	struct priv *priv; /**< Back pointer to private data. */
 	unsigned int socket; /**< CPU socket ID for allocations. */
 	struct ibv_cq *cq; /**< Completion queue. */
@@ -161,34 +155,12 @@ int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			const struct rte_eth_txconf *conf);
 void mlx4_tx_queue_release(void *dpdk_txq);
 
-/**
- * Get memory region (MR) <-> memory pool (MP) association from txq->mp2mr[].
- * Call mlx4_txq_add_mr() if MP is not registered yet.
- *
- * @param txq
- *   Pointer to Tx queue structure.
- * @param[in] mp
- *   Memory pool for which a memory region lkey must be returned.
- *
- * @return
- *   mr->lkey on success, (uint32_t)-1 on failure.
- */
 static inline uint32_t
 mlx4_txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
 {
-	unsigned int i;
-
-	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i].mp == NULL)) {
-			/* Unknown MP, add a new MR for it. */
-			break;
-		}
-		if (txq->mp2mr[i].mp == mp) {
-			/* MP found MP. */
-			return txq->mp2mr[i].lkey;
-		}
-	}
-	return mlx4_txq_add_mr(txq, mp, i);
+	(void)txq;
+	(void)mp;
+	return UINT32_MAX;
 }
 
 #endif /* MLX4_RXTX_H_ */
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index fe6a8e07e..5ea09b0b0 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -63,64 +63,6 @@ mlx4_txq_free_elts(struct txq *txq)
 	txq->elts_tail = txq->elts_head;
 }
 
-struct txq_mp2mr_mbuf_check_data {
-	int ret;
-};
-
-/**
- * Callback function for rte_mempool_obj_iter() to check whether a given
- * mempool object looks like a mbuf.
- *
- * @param[in] mp
- *   The mempool pointer
- * @param[in] arg
- *   Context data (struct mlx4_txq_mp2mr_mbuf_check_data). Contains the
- *   return value.
- * @param[in] obj
- *   Object address.
- * @param index
- *   Object index, unused.
- */
-static void
-mlx4_txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
-			  uint32_t index)
-{
-	struct txq_mp2mr_mbuf_check_data *data = arg;
-	struct rte_mbuf *buf = obj;
-
-	(void)index;
-	/*
-	 * Check whether mbuf structure fits element size and whether mempool
-	 * pointer is valid.
-	 */
-	if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
-		data->ret = -1;
-}
-
-/**
- * Iterator function for rte_mempool_walk() to register existing mempools and
- * fill the MP to MR cache of a Tx queue.
- *
- * @param[in] mp
- *   Memory Pool to register.
- * @param *arg
- *   Pointer to Tx queue structure.
- */
-static void
-mlx4_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
-{
-	struct txq *txq = arg;
-	struct txq_mp2mr_mbuf_check_data data = {
-		.ret = 0,
-	};
-
-	/* Register mempool only if the first element looks like a mbuf. */
-	if (rte_mempool_obj_iter(mp, mlx4_txq_mp2mr_mbuf_check, &data) == 0 ||
-			data.ret == -1)
-		return;
-	mlx4_txq_mp2mr(txq, mp);
-}
-
 /**
  * Retrieves information needed in order to directly access the Tx queue.
  *
@@ -404,8 +346,6 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
 		(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
-	/* Pre-register known mempools. */
-	rte_mempool_walk(mlx4_txq_mp2mr_iter, txq);
 	DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
 	dev->data->tx_queues[idx] = txq;
 	return 0;
@@ -446,11 +386,5 @@ mlx4_tx_queue_release(void *dpdk_txq)
 		claim_zero(mlx4_glue->destroy_qp(txq->qp));
 	if (txq->cq)
 		claim_zero(mlx4_glue->destroy_cq(txq->cq));
-	for (i = 0; i != RTE_DIM(txq->mp2mr); ++i) {
-		if (!txq->mp2mr[i].mp)
-			break;
-		assert(txq->mp2mr[i].mr);
-		mlx4_mr_put(txq->mp2mr[i].mr);
-	}
 	rte_free(txq);
 }
-- 
2.11.0



More information about the dev mailing list