[dpdk-dev] [RFC 21/35] mempool: default allocation in several memory chunks

Olivier Matz olivier.matz at 6wind.com
Wed Mar 9 17:19:27 CET 2016


Introduce rte_mempool_populate_default() which allocates
mempool objects in several memzones.

The mempool header is now always allocated in a specific memzone
(not with its objects). Thanks to this modification, we can remove
many specific behavior that was required when hugepages are not
enabled in case we are using rte_mempool_xmem_create().

This change requires to update how kni and mellanox drivers lookup for
mbuf memory. This will only work if there is only one memory chunk (like
today), but we could make use of rte_mempool_mem_iter() to support more
memory chunks.

We can also remove RTE_MEMPOOL_OBJ_NAME that is not required anymore for
the lookup, as memory chunks are referenced by the mempool.

Note that rte_mempool_create() is still broken (it was the case before)
when there is no hugepages support (rte_mempool_create_xmem() has to be
used). This is fixed in next commit.

Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
 drivers/net/mlx4/mlx4.c               |  18 ++++--
 drivers/net/mlx5/mlx5_rxq.c           |   9 ++-
 drivers/net/mlx5/mlx5_rxtx.c          |   9 ++-
 lib/librte_kni/rte_kni.c              |  12 +++-
 lib/librte_mempool/rte_dom0_mempool.c |   2 +-
 lib/librte_mempool/rte_mempool.c      | 116 +++++++++++++++++++---------------
 lib/librte_mempool/rte_mempool.h      |  11 ----
 7 files changed, 102 insertions(+), 75 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index d9b2291..405324c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -1237,9 +1237,14 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp)
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq, mp->name, (const void *)mp);
+	if (mp->nb_mem_chunks != 1) {
+		DEBUG("%p: only 1 memory chunk is supported in mempool",
+			(void *)txq);
+		return (uint32_t)-1;
+	}
 	mr = ibv_reg_mr(txq->priv->pd,
-			(void *)mp->elt_va_start,
-			(mp->elt_va_end - mp->elt_va_start),
+			(void *)STAILQ_FIRST(&mp->mem_list)->addr,
+			STAILQ_FIRST(&mp->mem_list)->len,
 			(IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
@@ -3675,6 +3680,11 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 		      " multiple of %d)", (void *)dev, MLX4_PMD_SGE_WR_N);
 		return EINVAL;
 	}
+	if (mp->nb_mem_chunks != 1) {
+		ERROR("%p: only 1 memory chunk is supported in mempool",
+			(void *)dev);
+		return EINVAL;
+	}
 	/* Get mbuf length. */
 	buf = rte_pktmbuf_alloc(mp);
 	if (buf == NULL) {
@@ -3702,8 +3712,8 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 	      (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
 	/* Use the entire RX mempool as the memory region. */
 	tmpl.mr = ibv_reg_mr(priv->pd,
-			     (void *)mp->elt_va_start,
-			     (mp->elt_va_end - mp->elt_va_start),
+			     (void *)STAILQ_FIRST(&mp->mem_list)->addr,
+			     STAILQ_FIRST(&mp->mem_list)->len,
 			     (IBV_ACCESS_LOCAL_WRITE |
 			      IBV_ACCESS_REMOTE_WRITE));
 	if (tmpl.mr == NULL) {
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index ebbe186..1513b37 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1113,6 +1113,11 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 		      " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
 		return EINVAL;
 	}
+	if (mp->nb_mem_chunks != 1) {
+		ERROR("%p: only 1 memory chunk is supported in mempool",
+			(void *)dev);
+		return EINVAL;
+	}
 	/* Get mbuf length. */
 	buf = rte_pktmbuf_alloc(mp);
 	if (buf == NULL) {
@@ -1140,8 +1145,8 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 	      (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
 	/* Use the entire RX mempool as the memory region. */
 	tmpl.mr = ibv_reg_mr(priv->pd,
-			     (void *)mp->elt_va_start,
-			     (mp->elt_va_end - mp->elt_va_start),
+			     (void *)STAILQ_FIRST(&mp->mem_list)->addr,
+			     STAILQ_FIRST(&mp->mem_list)->len,
 			     (IBV_ACCESS_LOCAL_WRITE |
 			      IBV_ACCESS_REMOTE_WRITE));
 	if (tmpl.mr == NULL) {
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index f002ca2..4ff88fc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -165,9 +165,14 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp)
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq, mp->name, (const void *)mp);
+	if (mp->nb_mem_chunks != 1) {
+		DEBUG("%p: only 1 memory chunk is supported in mempool",
+			(void *)txq);
+		return (uint32_t)-1;
+	}
 	mr = ibv_reg_mr(txq->priv->pd,
-			(void *)mp->elt_va_start,
-			(mp->elt_va_end - mp->elt_va_start),
+			(void *)STAILQ_FIRST(&mp->mem_list)->addr,
+			STAILQ_FIRST(&mp->mem_list)->len,
 			(IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index ea9baf4..3028fd4 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -323,6 +323,7 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	char intf_name[RTE_KNI_NAMESIZE];
 	char mz_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
+	const struct rte_mempool *mp;
 	struct rte_kni_memzone_slot *slot = NULL;
 
 	if (!pktmbuf_pool || !conf || !conf->name[0])
@@ -415,12 +416,17 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 
 
 	/* MBUF mempool */
-	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
+	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,
 		pktmbuf_pool->name);
 	mz = rte_memzone_lookup(mz_name);
 	KNI_MEM_CHECK(mz == NULL);
-	dev_info.mbuf_va = mz->addr;
-	dev_info.mbuf_phys = mz->phys_addr;
+	mp = (struct rte_mempool *)mz->addr;
+	/* KNI currently requires to have only one memory chunk */
+	if (mp->nb_mem_chunks != 1)
+		goto kni_fail;
+
+	dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;
+	dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;
 	ctx->pktmbuf_pool = pktmbuf_pool;
 	ctx->group_id = conf->group_id;
 	ctx->slot_id = slot->id;
diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c
index 0051bd5..dad755c 100644
--- a/lib/librte_mempool/rte_dom0_mempool.c
+++ b/lib/librte_mempool/rte_dom0_mempool.c
@@ -110,7 +110,7 @@ rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size,
 	if (pa == NULL)
 		return mp;
 
-	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name);
+	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT "_elt", name);
 	mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags);
 	if (mz == NULL) {
 		free(pa);
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 805ac19..7fd2bb4 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -391,7 +391,7 @@ rte_mempool_ring_create(struct rte_mempool *mp)
 }
 
 /* free a memchunk allocated with rte_memzone_reserve() */
-__rte_unused static void
+static void
 rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr,
 	void *opaque)
 {
@@ -509,6 +509,59 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
 	return cnt;
 }
 
+/* Default function to populate the mempool: allocate memory in mezones,
+ * and populate them. Return the number of objects added, or a negative
+ * value on error. */
+static int rte_mempool_populate_default(struct rte_mempool *mp)
+{
+	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	const struct rte_memzone *mz;
+	size_t size, total_elt_sz, align;
+	unsigned mz_id, n;
+	int ret;
+
+	/* mempool must not be populated */
+	if (mp->nb_mem_chunks != 0)
+		return -EEXIST;
+
+	align = RTE_CACHE_LINE_SIZE;
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+	for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
+		size = rte_mempool_xmem_size(n, total_elt_sz, 0);
+
+		ret = snprintf(mz_name, sizeof(mz_name),
+			RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
+		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
+			ret = -ENAMETOOLONG;
+			goto fail;
+		}
+
+		mz = rte_memzone_reserve_aligned(mz_name, size,
+			mp->socket_id, mz_flags, align);
+		/* not enough memory, retry with the biggest zone we have */
+		if (mz == NULL)
+			mz = rte_memzone_reserve_aligned(mz_name, 0,
+				mp->socket_id, mz_flags, align);
+		if (mz == NULL) {
+			ret = -rte_errno;
+			goto fail;
+		}
+
+		ret = rte_mempool_populate_phys(mp, mz->addr, mz->phys_addr,
+			mz->len, rte_mempool_memchunk_mz_free,
+			RTE_DECONST(void *, mz));
+		if (ret < 0)
+			goto fail;
+	}
+
+	return mp->size;
+
+ fail:
+	rte_mempool_free_memchunks(mp);
+	return ret;
+}
+
 /*
  * Create the mempool over already allocated chunk of memory.
  * That external memory buffer can consists of physically disjoint pages.
@@ -531,10 +584,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	const struct rte_memzone *mz;
 	size_t mempool_size;
 	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
-	void *obj;
 	struct rte_mempool_objsz objsz;
-	void *startaddr;
-	int page_size = getpagesize();
 	int ret;
 
 	/* compilation-time checks */
@@ -589,16 +639,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	private_data_size = (private_data_size +
 			     RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK);
 
-	if (! rte_eal_has_hugepages()) {
-		/*
-		 * expand private data size to a whole page, so that the
-		 * first pool element will start on a new standard page
-		 */
-		int head = sizeof(struct rte_mempool);
-		int new_size = (private_data_size + head) % page_size;
-		if (new_size)
-			private_data_size += page_size - new_size;
-	}
 
 	/* try to allocate tailq entry */
 	te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
@@ -615,17 +655,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mempool_size = MEMPOOL_HEADER_SIZE(mp, cache_size);
 	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
-	if (vaddr == NULL)
-		mempool_size += (size_t)objsz.total_size * n;
-
-	if (! rte_eal_has_hugepages()) {
-		/*
-		 * we want the memory pool to start on a page boundary,
-		 * because pool elements crossing page boundaries would
-		 * result in discontiguous physical addresses
-		 */
-		mempool_size += page_size;
-	}
 
 	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);
 
@@ -633,20 +662,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	if (mz == NULL)
 		goto exit_unlock;
 
-	if (rte_eal_has_hugepages()) {
-		startaddr = (void*)mz->addr;
-	} else {
-		/* align memory pool start address on a page boundary */
-		unsigned long addr = (unsigned long)mz->addr;
-		if (addr & (page_size - 1)) {
-			addr += page_size;
-			addr &= ~(page_size - 1);
-		}
-		startaddr = (void*)addr;
-	}
-
 	/* init the mempool structure */
-	mp = startaddr;
 	memset(mp, 0, sizeof(*mp));
 	snprintf(mp->name, sizeof(mp->name), "%s", name);
 	mp->phys_addr = mz->phys_addr;
@@ -677,22 +693,17 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp_init(mp, mp_init_arg);
 
 	/* mempool elements allocated together with mempool */
-	if (vaddr == NULL) {
-		/* calculate address of the first element for continuous mempool. */
-		obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, cache_size) +
-			private_data_size;
-		obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
-
-		ret = rte_mempool_populate_phys(mp, obj,
-			mp->phys_addr + ((char *)obj - (char *)mp),
-			objsz.total_size * n, NULL, NULL);
-		if (ret != (int)mp->size)
-			goto exit_unlock;
-	} else {
+	if (vaddr == NULL)
+		ret = rte_mempool_populate_default(mp);
+	else
 		ret = rte_mempool_populate_phys_tab(mp, vaddr,
 			paddr, pg_num, pg_shift, NULL, NULL);
-		if (ret != (int)mp->size)
-			goto exit_unlock;
+	if (ret < 0) {
+		rte_errno = -ret;
+		goto exit_unlock;
+	} else if (ret != (int)mp->size) {
+		rte_errno = EINVAL;
+		goto exit_unlock;
 	}
 
 	/* call the initializer */
@@ -715,6 +726,7 @@ exit_unlock:
 		rte_ring_free(mp->ring);
 	}
 	rte_free(te);
+	rte_memzone_free(mz);
 
 	return NULL;
 }
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 2cce7ee..2770d80 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -124,17 +124,6 @@ struct rte_mempool_objsz {
 /* "MP_<name>" */
 #define	RTE_MEMPOOL_MZ_FORMAT	RTE_MEMPOOL_MZ_PREFIX "%s"
 
-#ifdef RTE_LIBRTE_XEN_DOM0
-
-/* "<name>_MP_elt" */
-#define	RTE_MEMPOOL_OBJ_NAME	"%s_" RTE_MEMPOOL_MZ_PREFIX "elt"
-
-#else
-
-#define	RTE_MEMPOOL_OBJ_NAME	RTE_MEMPOOL_MZ_FORMAT
-
-#endif /* RTE_LIBRTE_XEN_DOM0 */
-
 #define	MEMPOOL_PG_SHIFT_MAX	(sizeof(uintptr_t) * CHAR_BIT - 1)
 
 /** Mempool over one chunk of physically continuous memory */
-- 
2.1.4



More information about the dev mailing list