|SUCCESS| [v8] mempool: improve cache behaviour and performance

dpdklab at iol.unh.edu dpdklab at iol.unh.edu
Thu Jun 4 15:48:18 CEST 2026


Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/164964

_AI Code Review_

=== Patch Review: series.patch (via Claude) ===
Review date: 2026-06-04

I'll review this DPDK mempool cache optimization patch against the DPDK coding standards and contribution guidelines.

git sha: 20260604114851.12586-1-mb at smartsharesystems.com
Author: Morten Brorup <mb at smartsharesystems.com>
Subject: [PATCH v8] mempool: improve cache behaviour and performance

This patch refactors the mempool cache to eliminate unexpected behavior
and reduce the mempool cache miss rate by changing the cache
flush/refill algorithm.

> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> index 17f90a6352..1b6cc181fb 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -158,3 +158,10 @@ Deprecation Notices
>  * net/iavf: The dynamic mbuf field used to detect LLDP packets on the
>    transmit path in the iavf PMD will be removed in a future release.
>    After removal, only packet type-based detection will be supported.
> +
> +* mempool: The ``flushthresh`` field in ``struct rte_mempool_cache``
> +  is obsolete, and will be removed in DPDK 26.11.
> +
> +* mempool: The object array in ``struct rte_mempool_cache`` is oversize by
> +  factor two, and will be reduced to ``RTE_MEMPOOL_CACHE_MAX_SIZE`` in
> +  DPDK 26.11.

[ ... ]

> diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
> index 3ddf7b9c11..817e2b8dc1 100644
> --- a/lib/mempool/rte_mempool.c
> +++ b/lib/mempool/rte_mempool.c
> @@ -52,11 +52,6 @@ static void
>  mempool_event_callback_invoke(enum rte_mempool_event event,
>  			      struct rte_mempool *mp);
>  
> -/* Note: avoid using floating point since that compiler
> - * may not think that is constant.
> - */
> -#define CALC_CACHE_FLUSHTHRESH(c) (((c) * 3) / 2)

[ ... ]

> @@ -757,13 +752,8 @@ rte_mempool_free(struct rte_mempool *mp)
>  static void
>  mempool_cache_init(struct rte_mempool_cache *cache, uint32_t size)
>  {
> -	/* Check that cache have enough space for flush threshold */
> -	RTE_BUILD_BUG_ON(CALC_CACHE_FLUSHTHRESH(RTE_MEMPOOL_CACHE_MAX_SIZE) >
> -			 RTE_SIZEOF_FIELD(struct rte_mempool_cache, objs) /
> -			 RTE_SIZEOF_FIELD(struct rte_mempool_cache, objs[0]));
> -
>  	cache->size = size;
> -	cache->flushthresh = CALC_CACHE_FLUSHTHRESH(size);
> +	cache->flushthresh = size; /* Obsolete; for API/ABI compatibility purposes only */
>  	cache->len = 0;
>  }

[ ... ]

> @@ -850,7 +840,7 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
>  
>  	/* asked cache too big */
>  	if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE ||
> -	    CALC_CACHE_FLUSHTHRESH(cache_size) > n) {
> +	    cache_size > n) {
>  		rte_errno = EINVAL;
>  		return NULL;
>  	}

[ ... ]

> diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
> index d3f969847b..50d958c7c6 100644
> --- a/lib/mempool/rte_mempool.h
> +++ b/lib/mempool/rte_mempool.h

[ ... ]

> @@ -1419,22 +1426,30 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table,
>  	RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_bulk, 1);
>  	RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_objs, n);
>  
> -	__rte_assume(cache->flushthresh <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
> -	__rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
> -	__rte_assume(cache->len <= cache->flushthresh);
> -	if (likely(cache->len + n <= cache->flushthresh)) {
> +	__rte_assume(cache->size <= RTE_MEMPOOL_CACHE_MAX_SIZE);
> +	__rte_assume(cache->size / 2 <= RTE_MEMPOOL_CACHE_MAX_SIZE / 2);
> +	__rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE);
> +	__rte_assume(cache->len <= cache->size);
> +	if (likely(cache->len + n <= cache->size)) {
>  		/* Sufficient room in the cache for the objects. */
>  		cache_objs = &cache->objs[cache->len];
>  		cache->len += n;
> -	} else if (n <= cache->flushthresh) {
> +	} else if (n <= cache->size / 2) {
>  		/*
> -		 * The cache is big enough for the objects, but - as detected by
> -		 * the comparison above - has insufficient room for them.
> -		 * Flush the cache to make room for the objects.
> +		 * The number of objects is within the cache bounce buffer limit,
> +		 * but - as detected by the comparison above - the cache has
> +		 * insufficient room for them.
> +		 * Flush the cache to the backend to make room for the objects;
> +		 * flush (size / 2) objects from the bottom of the cache, where
> +		 * objects are less hot, and move down the remaining objects, which
> +		 * are more hot, from the upper half of the cache.
>  		 */
> -		cache_objs = &cache->objs[0];
> -		rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache->len);
> -		cache->len = n;
> +		__rte_assume(cache->len > cache->size / 2);
> +		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[0], cache->size / 2);
> +		rte_memcpy(&cache->objs[0], &cache->objs[cache->size / 2],
> +				sizeof(void *) * (cache->len - cache->size / 2));

Does this code properly handle the case where cache->size is 1? The
operations with cache->size / 2 would result in 0, so could this code
flush 0 objects to the backend via rte_mempool_ops_enqueue_bulk()?

Also, when cache->size / 2 is 0, does the rte_memcpy call become a no-op
or could it cause issues with zero-length copies?

> +		cache_objs = &cache->objs[cache->len - cache->size / 2];
> +		cache->len = cache->len - cache->size / 2 + n;

In the expression cache->len - cache->size / 2 + n, does the code
guarantee that cache->len + n - cache->size / 2 stays within the bounds
of the objs[] array, specifically less than RTE_MEMPOOL_CACHE_MAX_SIZE?

>  	} else {
>  		/* The request itself is too big for the cache. */
>  		goto driver_enqueue_stats_incremented;

[ ... ]

> @@ -1553,7 +1568,7 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,
>  	/* The cache is a stack, so copy will be in reverse order. */
>  	cache_objs = &cache->objs[cache->len];
>  
> -	__rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
> +	__rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE);
>  	if (likely(n <= cache->len)) {
>  		/* The entire request can be satisfied from the cache. */
>  		RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);

[ ... ]

> @@ -1577,13 +1592,13 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,
>  	for (index = 0; index < len; index++)
>  		*obj_table++ = *--cache_objs;
>  
> -	/* Dequeue below would overflow mem allocated for cache? */
> -	if (unlikely(remaining > RTE_MEMPOOL_CACHE_MAX_SIZE))
> +	/* Dequeue below would exceed the cache bounce buffer limit? */
> +	__rte_assume(cache->size / 2 <= RTE_MEMPOOL_CACHE_MAX_SIZE / 2);
> +	if (unlikely(remaining > cache->size / 2))
>  		goto driver_dequeue;
>  
> -	/* Fill the cache from the backend; fetch size + remaining objects. */
> -	ret = rte_mempool_ops_dequeue_bulk(mp, cache->objs,
> -			cache->size + remaining);
> +	/* Fill the cache from the backend; fetch (size / 2) objects. */
> +	ret = rte_mempool_ops_dequeue_bulk(mp, cache->objs, cache->size / 2);
>  	if (unlikely(ret < 0)) {
>  		/*
>  		 * We are buffer constrained, and not able to fetch all that.
> @@ -1597,10 +1612,11 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,
>  	RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
>  	RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
>  
> -	__rte_assume(cache->size <= RTE_MEMPOOL_CACHE_MAX_SIZE);
> -	__rte_assume(remaining <= RTE_MEMPOOL_CACHE_MAX_SIZE);
> -	cache_objs = &cache->objs[cache->size + remaining];
> -	cache->len = cache->size;
> +	__rte_assume(cache->size / 2 <= RTE_MEMPOOL_CACHE_MAX_SIZE / 2);
> +	__rte_assume(remaining <= RTE_MEMPOOL_CACHE_MAX_SIZE / 2);
> +	__rte_assume(remaining <= cache->size / 2);
> +	cache_objs = &cache->objs[cache->size / 2];
> +	cache->len = cache->size / 2 - remaining;

Does this code handle the edge case where remaining equals cache->size / 2
correctly? In that scenario, cache->len becomes 0, and the subsequent loop
copies remaining objects. Can this result in accessing cache_objs before
the start of the objs[] array via the pre-decrement operation in the loop?

>  	for (index = 0; index < remaining; index++)
>  		*obj_table++ = *--cache_objs;


More information about the test-report mailing list