[dpdk-dev] [PATCH v5] mempool: reduce rte_mempool structure size

Wiles, Keith keith.wiles at intel.com
Thu Apr 14 15:28:48 CEST 2016


>From: Keith Wiles <keith.wiles at intel.com>
>
>The rte_mempool structure is changed, which will cause an ABI change
>for this structure. Providing backward compat is not reasonable
>here as this structure is used in multiple defines/inlines.
>
>Allow mempool cache support to be dynamic depending on if the
>mempool being created needs cache support. Saves about 1.5M of
>memory used by the rte_mempool structure.
>
>Allocating small mempools which do not require cache can consume
>larges amounts of memory if you have a number of these mempools.
>
>Change to be effective in release 16.07.
>
>Signed-off-by: Keith Wiles <keith.wiles at intel.com>
>Acked-by: Olivier Matz <olivier.matz at 6wind.com>
>---
>
>Changes in v5:
>
>- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
>  Error log was:
>
>  rte_mempool.c: In function ‘rte_mempool_xmem_create’:
>  rte_mempool.c:595: error: cast increases required alignment of target type
>
>
> app/test/test_mempool.c          |  4 +--
> lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
> lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
> 3 files changed, 40 insertions(+), 48 deletions(-)
>
>diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
>index f0f823b..10e1fa4 100644
>--- a/app/test/test_mempool.c
>+++ b/app/test/test_mempool.c
>@@ -122,8 +122,8 @@ test_mempool_basic(void)
> 		return -1;
> 
> 	printf("get private data\n");
>-	if (rte_mempool_get_priv(mp) !=
>-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
>+	if (rte_mempool_get_priv(mp) != (char *)mp +
>+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))

Should we not add the RTE_PTR_ADD() here as well?

> 		return -1;
> 
> 	printf("get physical address of an object\n");
>diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>index f8781e1..7a0e07e 100644
>--- a/lib/librte_mempool/rte_mempool.c
>+++ b/lib/librte_mempool/rte_mempool.c
>@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	/* compilation-time checks */
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
>-			  RTE_CACHE_LINE_MASK) != 0);
>-#endif
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 		 */
> 		int head = sizeof(struct rte_mempool);
> 		int new_size = (private_data_size + head) % page_size;
>-		if (new_size) {
>+		if (new_size)
> 			private_data_size += page_size - new_size;
>-		}
> 	}
> 
> 	/* try to allocate tailq entry */
>@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	 * store mempool objects. Otherwise reserve a memzone that is large
> 	 * enough to hold mempool header and metadata plus mempool objects.
> 	 */
>-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
>+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
>+	mempool_size += private_data_size;
> 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
> 	if (vaddr == NULL)
> 		mempool_size += (size_t)objsz.total_size * n;
>@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
> 	mp->private_data_size = private_data_size;
> 
>+	/*
>+	 * local_cache pointer is set even if cache_size is zero.
>+	 * The local_cache points to just past the elt_pa[] array.
>+	 */
>+	mp->local_cache = (struct rte_mempool_cache *)
>+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
>+
> 	/* calculate address of the first element for continuous mempool. */
>-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
>+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
> 		private_data_size;
> 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
> 
>@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 		mp->elt_va_start = (uintptr_t)obj;
> 		mp->elt_pa[0] = mp->phys_addr +
> 			(mp->elt_va_start - (uintptr_t)mp);
>-
>-	/* mempool elements in a separate chunk of memory. */
> 	} else {
>+		/* mempool elements in a separate chunk of memory. */
> 		mp->elt_va_start = (uintptr_t)vaddr;
> 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
> 	}
>@@ -643,19 +645,15 @@ unsigned
> rte_mempool_count(const struct rte_mempool *mp)
> {
> 	unsigned count;
>+	unsigned lcore_id;
> 
> 	count = rte_ring_count(mp->ring);
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>-	{
>-		unsigned lcore_id;
>-		if (mp->cache_size == 0)
>-			return count;
>+	if (mp->cache_size == 0)
>+		return count;
> 
>-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>-			count += mp->local_cache[lcore_id].len;
>-	}
>-#endif
>+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>+		count += mp->local_cache[lcore_id].len;
> 
> 	/*
> 	 * due to race condition (access to len is not locked), the
>@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)
> static unsigned
> rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	unsigned lcore_id;
> 	unsigned count = 0;
> 	unsigned cache_count;
> 
> 	fprintf(f, "  cache infos:\n");
> 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
>+
>+	if (mp->cache_size == 0)
>+		return count;
>+
> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> 		cache_count = mp->local_cache[lcore_id].len;
> 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
>@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> 	}
> 	fprintf(f, "    total_cache_count=%u\n", count);
> 	return count;
>-#else
>-	RTE_SET_USED(mp);
>-	fprintf(f, "  cache disabled\n");
>-	return 0;
>-#endif
> }
> 
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
> #define mempool_audit_cookies(mp) do {} while(0)
> #endif
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /* check cookies before and after objects */
> static void
> mempool_audit_cache(const struct rte_mempool *mp)
> {
> 	/* check cache size consistency */
> 	unsigned lcore_id;
>+
>+	if (mp->cache_size == 0)
>+		return;
>+
> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
> 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
> 		}
> 	}
> }
>-#else
>-#define mempool_audit_cache(mp) do {} while(0)
>-#endif
>-
> 
> /* check the consistency of mempool (size, cookies, ...) */
> void
>diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>index 9745bf0..8595e77 100644
>--- a/lib/librte_mempool/rte_mempool.h
>+++ b/lib/librte_mempool/rte_mempool.h
>@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
> } __rte_cache_aligned;
> #endif
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /**
>  * A structure that stores a per-core object cache.
>  */
>@@ -107,7 +106,6 @@ struct rte_mempool_cache {
> 	 */
> 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
> } __rte_cache_aligned;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> /**
>  * A structure that stores the size of mempool elements.
>@@ -194,10 +192,7 @@ struct rte_mempool {
> 
> 	unsigned private_data_size;      /**< Size of private data. */
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>-	/** Per-lcore local cache. */
>-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
>-#endif
>+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
> 
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> 	/** Per-lcore statistics. */
>@@ -247,6 +242,13 @@ struct rte_mempool {
> #endif
> 
> /**
>+ * Size of elt_pa array size based on number of pages. (Internal use)
>+ */
>+#define __PA_SIZE(mp, pgn) \
>+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
>+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
>+
>+/**
>  * Calculate the size of the mempool header.
>  *
>  * @param mp
>@@ -254,9 +256,9 @@ struct rte_mempool {
>  * @param pgn
>  *   Number of pages used to store mempool objects.
>  */
>-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
>-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
>-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
>+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
>+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
>+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
> 
> /**
>  * Return true if the whole mempool is in contiguous memory.
>@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
> __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> 		    unsigned n, int is_mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	struct rte_mempool_cache *cache;
> 	uint32_t index;
> 	void **cache_objs;
> 	unsigned lcore_id = rte_lcore_id();
> 	uint32_t cache_size = mp->cache_size;
> 	uint32_t flushthresh = mp->cache_flushthresh;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* increment stat now, adding in mempool always success */
> 	__MEMPOOL_STAT_ADD(mp, put, n);
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	/* cache is not enabled or single producer or non-EAL thread */
> 	if (unlikely(cache_size == 0 || is_mp == 0 ||
> 		     lcore_id >= RTE_MAX_LCORE))
>@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> 	return;
> 
> ring_enqueue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* push remaining objects in ring */
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> 		   unsigned n, int is_mc)
> {
> 	int ret;
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	struct rte_mempool_cache *cache;
> 	uint32_t index, len;
> 	void **cache_objs;
>@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> 	return 0;
> 
> ring_dequeue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* get remaining objects from ring */
> 	if (is_mc)
>@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
>  */
> static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
> {
>-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>+	return (char *)mp +
>+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);

And here?

> }
> 
> /**
>-- 
>2.1.4
>
>


Regards,
Keith






More information about the dev mailing list