[dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool

Olivier Matz olivier.matz at 6wind.com
Wed Apr 21 18:29:38 CEST 2021
Previous message (by thread): [dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool
Next message (by thread): [dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Hi Dharmik,

Please see some comments below.

On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote:
> From: Joyce Kong <joyce.kong at arm.com>
> 
> If cache is enabled, objects will be retrieved/put from/to cache,
> subsequently from/to the common pool. Now the debug stats calculate
> the objects retrieved/put from/to cache and pool together, it is
> better to distinguish them.
> 
> Signed-off-by: Joyce Kong <joyce.kong at arm.com>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar at arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> ---
>  lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++
>  lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++----------
>  2 files changed, 57 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
> index afb1239c8d48..339f14455624 100644
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>  	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>  		sum.put_bulk += mp->stats[lcore_id].put_bulk;
>  		sum.put_objs += mp->stats[lcore_id].put_objs;
> +		sum.put_common_pool_bulk +=
> +			mp->stats[lcore_id].put_common_pool_bulk;
> +		sum.put_common_pool_objs +=
> +			mp->stats[lcore_id].put_common_pool_objs;
> +		sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk;
> +		sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs;
> +		sum.get_common_pool_bulk +=
> +			mp->stats[lcore_id].get_common_pool_bulk;
> +		sum.get_common_pool_objs +=
> +			mp->stats[lcore_id].get_common_pool_objs;
> +		sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk;
> +		sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs;
>  		sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
>  		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
>  		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>  	fprintf(f, "  stats:\n");
>  	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
>  	fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
> +	fprintf(f, "    put_common_pool_bulk=%"PRIu64"\n",
> +						sum.put_common_pool_bulk);
> +	fprintf(f, "    put_common_pool_objs=%"PRIu64"\n",
> +						sum.put_common_pool_objs);
> +	fprintf(f, "    put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk);
> +	fprintf(f, "    put_cache_objs=%"PRIu64"\n", sum.put_cache_objs);
> +	fprintf(f, "    get_common_pool_bulk=%"PRIu64"\n",
> +						sum.get_common_pool_bulk);
> +	fprintf(f, "    get_common_pool_objs=%"PRIu64"\n",
> +						sum.get_common_pool_objs);
> +	fprintf(f, "    get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk);
> +	fprintf(f, "    get_cache_objs=%"PRIu64"\n", sum.get_cache_objs);
>  	fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
>  	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
>  	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 848a19226149..0959f8a3f367 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -66,12 +66,20 @@ extern "C" {
>   * A structure that stores the mempool statistics (per-lcore).
>   */
>  struct rte_mempool_debug_stats {
> -	uint64_t put_bulk;         /**< Number of puts. */
> -	uint64_t put_objs;         /**< Number of objects successfully put. */
> -	uint64_t get_success_bulk; /**< Successful allocation number. */
> -	uint64_t get_success_objs; /**< Objects successfully allocated. */
> -	uint64_t get_fail_bulk;    /**< Failed allocation number. */
> -	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
> +	uint64_t put_bulk;		  /**< Number of puts. */
> +	uint64_t put_objs;		  /**< Number of objects successfully put. */
> +	uint64_t put_common_pool_bulk;	  /**< Number of bulks enqueued in common pool. */
> +	uint64_t put_common_pool_objs;	  /**< Number of objects enqueued in common pool. */
> +	uint64_t put_cache_bulk;	  /**< Number of bulks enqueued in cache. */
> +	uint64_t put_cache_objs;	  /**< Number of objects enqueued in cache. */
> +	uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from common pool. */
> +	uint64_t get_common_pool_objs;	  /**< Number of objects dequeued from common pool. */
> +	uint64_t get_cache_bulk;	  /**< Number of bulks dequeued from cache. */
> +	uint64_t get_cache_objs;	  /**< Number of objects dequeued from cache. */
> +	uint64_t get_success_bulk;	  /**< Successful allocation number. */
> +	uint64_t get_success_objs;	  /**< Objects successfully allocated. */
> +	uint64_t get_fail_bulk;		  /**< Failed allocation number. */
> +	uint64_t get_fail_objs;		  /**< Objects that failed to be allocated. */

I missed it the first time, but this changes the size of the
rte_mempool_debug_stats structure. I think we don't care about this ABI
breakage because this structure is only defined if
RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc.

About the field themselves, I'm not certain that there is an added value
to have stats for cache gets and puts. My feeling is that the important
stat to monitor is the access to common pool, because it is the one that
highlights a possible performance impact (contention). The cache stats
are more or less equal to "success + fail - common". Moreover, it will
simplify the patch and avoid risks of mistakes.

What do you think?

>  	/** Successful allocation number of contiguous blocks. */
>  	uint64_t get_success_blks;
>  	/** Failed allocation number of contiguous blocks. */
> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
>  		void **obj_table, unsigned n)
>  {
>  	struct rte_mempool_ops *ops;
> +	int ret;
>  
>  	rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
>  	ops = rte_mempool_get_ops(mp->ops_index);
> -	return ops->dequeue(mp, obj_table, n);
> +	ret = ops->dequeue(mp, obj_table, n);
> +	if (ret == 0) {
> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
> +		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> +		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> +	}
> +	return ret;
>  }
>  
>  /**
> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
>  {
>  	struct rte_mempool_ops *ops;
>  
> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
>  	rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
>  	ops = rte_mempool_get_ops(mp->ops_index);
>  	return ops->enqueue(mp, obj_table, n);
> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
>  
>  	/* Add elements back into the cache */
>  	rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
> -
>  	cache->len += n;
>  
> +	__MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
> +
>  	if (cache->len >= cache->flushthresh) {
> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs,
> +				   n - (cache->len - cache->size));
>  		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
>  				cache->len - cache->size);
>  		cache->len = cache->size;
> -	}
> +	} else
> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs, n);
>  

In case we keep cache stats, I'd add {} after the else to be consistent
with the if().

>  	return;
>  
> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>  
>  	cache->len -= n;
>  
> -	__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> -	__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> +	__MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1);
> +	__MEMPOOL_STAT_ADD(mp, get_cache_objs, n);

In case we keep cache stats, I don't think we should remove get_success
stats increment. Else, the success stats will never be incremented when
retrieving objects from the cache.


>  
>  	return 0;
>  
> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>  	if (ret < 0) {
>  		__MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>  		__MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
> -	} else {
> -		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> -		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>  	}
>  
>  	return ret;
> -- 
> 2.17.1
>
Previous message (by thread): [dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool
Next message (by thread): [dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the dev mailing list