[PATCH v4 3/5] graph: add stats for node specific errors

Kiran Kumar Kokkilagadda kirankumark at marvell.com
Thu Aug 22 08:37:48 CEST 2024



> -----Original Message-----
> From: pbhagavatula at marvell.com <pbhagavatula at marvell.com>
> Sent: Friday, August 16, 2024 8:39 PM
> To: Jerin Jacob <jerinj at marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram at marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark at marvell.com>; zhirun.yan at intel.com; Zhirun Yan
> <yanzhirun_163 at 163.com>
> Cc: dev at dpdk.org; Pavan Nikhilesh Bhagavatula <pbhagavatula at marvell.com>
> Subject: [PATCH v4 3/5] graph: add stats for node specific errors
> 
> From: Pavan Nikhilesh <pbhagavatula at marvell.com>
> 
> Add support for retrieving/printing stats for node specific errors using
> rte_graph_cluster_stats_get().
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula at marvell.com>
> ---
Acked-by: Kiran Kumar Kokkilagadda <kirankumark at marvell.com>

>  lib/graph/graph_stats.c | 79 ++++++++++++++++++++++++++++++++++++++++-
>  lib/graph/rte_graph.h   |  4 +++
>  2 files changed, 82 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/graph/graph_stats.c b/lib/graph/graph_stats.c index
> d71451a17b..1ac0a6797f 100644
> --- a/lib/graph/graph_stats.c
> +++ b/lib/graph/graph_stats.c
> @@ -121,6 +121,25 @@ print_node(FILE *f, const struct
> rte_graph_cluster_node_stats *stat, bool dispat
>  	}
>  }
> 
> +static inline void
> +print_err(FILE *f, const struct rte_graph_cluster_node_stats *stat,
> +bool dispatch) {
> +	int i;
> +
> +	if (dispatch) {
> +		for (i = 0; i < stat->node_error_cntrs; i++)
> +			fprintf(f,
> +				"|\t%-24s|%15s|%-15" PRIu64
> "|%15s|%15s|%15s|%15s|%15s|%11.4s|\n",
> +				stat->node_error_desc[i], "", stat-
> >node_error_count[i], "", "", "",
> +				"", "", "");
> +	} else {
> +		for (i = 0; i < stat->node_error_cntrs; i++)
> +			fprintf(f, "|\t%-24s|%15s|%-15" PRIu64
> "|%15s|%15.3s|%15.6s|%11.4s|\n",
> +				stat->node_error_desc[i], "", stat-
> >node_error_count[i], "", "", "",
> +				"");
> +	}
> +}
> +
>  static int
>  graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void *cookie,
>  		       const struct rte_graph_cluster_node_stats *stat) @@ -129,8
> +148,11 @@ graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void
> *cookie,
> 
>  	if (unlikely(is_first))
>  		print_banner(f, dispatch);
> -	if (stat->objs)
> +	if (stat->objs) {
>  		print_node(f, stat, dispatch);
> +		if (stat->node_error_cntrs)
> +			print_err(f, stat, dispatch);
> +	}
>  	if (unlikely(is_last)) {
>  		if (dispatch)
>  			boarder_model_dispatch();
> @@ -203,6 +225,7 @@ stats_mem_populate(struct rte_graph_cluster_stats
> **stats_in,
>  	struct cluster_node *cluster;
>  	struct rte_node *node;
>  	rte_node_t count;
> +	uint8_t i;
> 
>  	cluster = stats->clusters;
> 
> @@ -240,6 +263,36 @@ stats_mem_populate(struct rte_graph_cluster_stats
> **stats_in,
>  		SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph
> %s",
>  			    graph_node->node->name, graph->name);
>  	cluster->nodes[cluster->nb_nodes++] = node;
> +	if (graph_node->node->errs) {
> +		cluster->stat.node_error_cntrs = graph_node->node->errs-
> >nb_errors;
> +		cluster->stat.node_error_count = rte_zmalloc_socket(
> +			NULL, sizeof(uint64_t) * graph_node->node->errs-
> >nb_errors,
> +			RTE_CACHE_LINE_SIZE, stats->socket_id);
> +		if (cluster->stat.node_error_count == NULL)
> +			SET_ERR_JMP(ENOMEM, free, "Failed to allocate
> memory node %s graph %s",
> +				    graph_node->node->name, graph->name);
> +
> +		cluster->stat.node_error_desc = rte_zmalloc_socket(
> +			NULL, sizeof(RTE_NODE_ERROR_DESC_SIZE) *
> graph_node->node->errs->nb_errors,
> +			RTE_CACHE_LINE_SIZE, stats->socket_id);
> +		if (cluster->stat.node_error_desc == NULL) {
> +			rte_free(cluster->stat.node_error_count);
> +			SET_ERR_JMP(ENOMEM, free, "Failed to allocate
> memory node %s graph %s",
> +				    graph_node->node->name, graph->name);
> +		}
> +
> +		for (i = 0; i < cluster->stat.node_error_cntrs; i++) {
> +			if (rte_strscpy(cluster->stat.node_error_desc[i],
> +					graph_node->node->errs->err_desc[i],
> +					RTE_NODE_ERROR_DESC_SIZE) < 0) {
> +				rte_free(cluster->stat.node_error_count);
> +				rte_free(cluster->stat.node_error_desc);
> +				SET_ERR_JMP(E2BIG, free,
> +					    "Error description overflow node %s
> graph %s",
> +					    graph_node->node->name, graph-
> >name);
> +			}
> +		}
> +	}
> 
>  	stats->sz += stats->cluster_node_size;
>  	stats->max_nodes++;
> @@ -388,6 +441,18 @@ rte_graph_cluster_stats_create(const struct
> rte_graph_cluster_stats_param *prm)  void
> rte_graph_cluster_stats_destroy(struct rte_graph_cluster_stats *stat)  {
> +	struct cluster_node *cluster;
> +	rte_node_t count;
> +
> +	cluster = stat->clusters;
> +	for (count = 0; count < stat->max_nodes; count++) {
> +		if (cluster->stat.node_error_cntrs) {
> +			rte_free(cluster->stat.node_error_count);
> +			rte_free(cluster->stat.node_error_desc);
> +		}
> +
> +		cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
> +	}
>  	return rte_free(stat);
>  }
> 
> @@ -399,7 +464,10 @@ cluster_node_arregate_stats(struct cluster_node
> *cluster, bool dispatch)
>  	uint64_t sched_objs = 0, sched_fail = 0;
>  	struct rte_node *node;
>  	rte_node_t count;
> +	uint64_t *err;
> +	uint8_t i;
> 
> +	memset(stat->node_error_count, 0, sizeof(uint64_t) *
> +stat->node_error_cntrs);
>  	for (count = 0; count < cluster->nb_nodes; count++) {
>  		node = cluster->nodes[count];
> 
> @@ -412,6 +480,12 @@ cluster_node_arregate_stats(struct cluster_node
> *cluster, bool dispatch)
>  		objs += node->total_objs;
>  		cycles += node->total_cycles;
>  		realloc_count += node->realloc_count;
> +
> +		if (node->err_off == 0)
> +			continue;
> +		err = RTE_PTR_ADD(node, node->err_off);
> +		for (i = 0; i < stat->node_error_cntrs; i++)
> +			stat->node_error_count[i] += err[i];
>  	}
> 
>  	stat->calls = calls;
> @@ -464,6 +538,7 @@ rte_graph_cluster_stats_reset(struct
> rte_graph_cluster_stats *stat)  {
>  	struct cluster_node *cluster;
>  	rte_node_t count;
> +	uint8_t i;
> 
>  	cluster = stat->clusters;
> 
> @@ -479,6 +554,8 @@ rte_graph_cluster_stats_reset(struct
> rte_graph_cluster_stats *stat)
>  		node->prev_objs = 0;
>  		node->prev_cycles = 0;
>  		node->realloc_count = 0;
> +		for (i = 0; i < node->node_error_cntrs; i++)
> +			node->node_error_count[i] = 0;
>  		cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
>  	}
>  }
> diff --git a/lib/graph/rte_graph.h b/lib/graph/rte_graph.h index
> b28143d737..12b6461cf5 100644
> --- a/lib/graph/rte_graph.h
> +++ b/lib/graph/rte_graph.h
> @@ -223,6 +223,10 @@ struct __rte_cache_aligned
> rte_graph_cluster_node_stats {
> 
>  	uint64_t realloc_count; /**< Realloc count. */
> 
> +	uint8_t node_error_cntrs;			   /**< Number of
> Node error counters. */
> +	char (*node_error_desc)[RTE_NODE_ERROR_DESC_SIZE]; /**< Names
> of the Node error counters. */
> +	uint64_t *node_error_count;			   /**< Total error
> count per each error. */
> +
>  	rte_node_t id;	/**< Node identifier of stats. */
>  	uint64_t hz;	/**< Cycles per seconds. */
>  	char name[RTE_NODE_NAMESIZE];	/**< Name of the node. */
> --
> 2.25.1



More information about the dev mailing list