[PATCH v4 3/5] graph: add stats for node specific errors
Kiran Kumar Kokkilagadda
kirankumark at marvell.com
Thu Aug 22 08:37:48 CEST 2024
> -----Original Message-----
> From: pbhagavatula at marvell.com <pbhagavatula at marvell.com>
> Sent: Friday, August 16, 2024 8:39 PM
> To: Jerin Jacob <jerinj at marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram at marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark at marvell.com>; zhirun.yan at intel.com; Zhirun Yan
> <yanzhirun_163 at 163.com>
> Cc: dev at dpdk.org; Pavan Nikhilesh Bhagavatula <pbhagavatula at marvell.com>
> Subject: [PATCH v4 3/5] graph: add stats for node specific errors
>
> From: Pavan Nikhilesh <pbhagavatula at marvell.com>
>
> Add support for retrieving/printing stats for node specific errors using
> rte_graph_cluster_stats_get().
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula at marvell.com>
> ---
Acked-by: Kiran Kumar Kokkilagadda <kirankumark at marvell.com>
> lib/graph/graph_stats.c | 79 ++++++++++++++++++++++++++++++++++++++++-
> lib/graph/rte_graph.h | 4 +++
> 2 files changed, 82 insertions(+), 1 deletion(-)
>
> diff --git a/lib/graph/graph_stats.c b/lib/graph/graph_stats.c index
> d71451a17b..1ac0a6797f 100644
> --- a/lib/graph/graph_stats.c
> +++ b/lib/graph/graph_stats.c
> @@ -121,6 +121,25 @@ print_node(FILE *f, const struct
> rte_graph_cluster_node_stats *stat, bool dispat
> }
> }
>
> +static inline void
> +print_err(FILE *f, const struct rte_graph_cluster_node_stats *stat,
> +bool dispatch) {
> + int i;
> +
> + if (dispatch) {
> + for (i = 0; i < stat->node_error_cntrs; i++)
> + fprintf(f,
> + "|\t%-24s|%15s|%-15" PRIu64
> "|%15s|%15s|%15s|%15s|%15s|%11.4s|\n",
> + stat->node_error_desc[i], "", stat-
> >node_error_count[i], "", "", "",
> + "", "", "");
> + } else {
> + for (i = 0; i < stat->node_error_cntrs; i++)
> + fprintf(f, "|\t%-24s|%15s|%-15" PRIu64
> "|%15s|%15.3s|%15.6s|%11.4s|\n",
> + stat->node_error_desc[i], "", stat-
> >node_error_count[i], "", "", "",
> + "");
> + }
> +}
> +
> static int
> graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void *cookie,
> const struct rte_graph_cluster_node_stats *stat) @@ -129,8
> +148,11 @@ graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void
> *cookie,
>
> if (unlikely(is_first))
> print_banner(f, dispatch);
> - if (stat->objs)
> + if (stat->objs) {
> print_node(f, stat, dispatch);
> + if (stat->node_error_cntrs)
> + print_err(f, stat, dispatch);
> + }
> if (unlikely(is_last)) {
> if (dispatch)
> boarder_model_dispatch();
> @@ -203,6 +225,7 @@ stats_mem_populate(struct rte_graph_cluster_stats
> **stats_in,
> struct cluster_node *cluster;
> struct rte_node *node;
> rte_node_t count;
> + uint8_t i;
>
> cluster = stats->clusters;
>
> @@ -240,6 +263,36 @@ stats_mem_populate(struct rte_graph_cluster_stats
> **stats_in,
> SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph
> %s",
> graph_node->node->name, graph->name);
> cluster->nodes[cluster->nb_nodes++] = node;
> + if (graph_node->node->errs) {
> + cluster->stat.node_error_cntrs = graph_node->node->errs-
> >nb_errors;
> + cluster->stat.node_error_count = rte_zmalloc_socket(
> + NULL, sizeof(uint64_t) * graph_node->node->errs-
> >nb_errors,
> + RTE_CACHE_LINE_SIZE, stats->socket_id);
> + if (cluster->stat.node_error_count == NULL)
> + SET_ERR_JMP(ENOMEM, free, "Failed to allocate
> memory node %s graph %s",
> + graph_node->node->name, graph->name);
> +
> + cluster->stat.node_error_desc = rte_zmalloc_socket(
> + NULL, sizeof(RTE_NODE_ERROR_DESC_SIZE) *
> graph_node->node->errs->nb_errors,
> + RTE_CACHE_LINE_SIZE, stats->socket_id);
> + if (cluster->stat.node_error_desc == NULL) {
> + rte_free(cluster->stat.node_error_count);
> + SET_ERR_JMP(ENOMEM, free, "Failed to allocate
> memory node %s graph %s",
> + graph_node->node->name, graph->name);
> + }
> +
> + for (i = 0; i < cluster->stat.node_error_cntrs; i++) {
> + if (rte_strscpy(cluster->stat.node_error_desc[i],
> + graph_node->node->errs->err_desc[i],
> + RTE_NODE_ERROR_DESC_SIZE) < 0) {
> + rte_free(cluster->stat.node_error_count);
> + rte_free(cluster->stat.node_error_desc);
> + SET_ERR_JMP(E2BIG, free,
> + "Error description overflow node %s
> graph %s",
> + graph_node->node->name, graph-
> >name);
> + }
> + }
> + }
>
> stats->sz += stats->cluster_node_size;
> stats->max_nodes++;
> @@ -388,6 +441,18 @@ rte_graph_cluster_stats_create(const struct
> rte_graph_cluster_stats_param *prm) void
> rte_graph_cluster_stats_destroy(struct rte_graph_cluster_stats *stat) {
> + struct cluster_node *cluster;
> + rte_node_t count;
> +
> + cluster = stat->clusters;
> + for (count = 0; count < stat->max_nodes; count++) {
> + if (cluster->stat.node_error_cntrs) {
> + rte_free(cluster->stat.node_error_count);
> + rte_free(cluster->stat.node_error_desc);
> + }
> +
> + cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
> + }
> return rte_free(stat);
> }
>
> @@ -399,7 +464,10 @@ cluster_node_arregate_stats(struct cluster_node
> *cluster, bool dispatch)
> uint64_t sched_objs = 0, sched_fail = 0;
> struct rte_node *node;
> rte_node_t count;
> + uint64_t *err;
> + uint8_t i;
>
> + memset(stat->node_error_count, 0, sizeof(uint64_t) *
> +stat->node_error_cntrs);
> for (count = 0; count < cluster->nb_nodes; count++) {
> node = cluster->nodes[count];
>
> @@ -412,6 +480,12 @@ cluster_node_arregate_stats(struct cluster_node
> *cluster, bool dispatch)
> objs += node->total_objs;
> cycles += node->total_cycles;
> realloc_count += node->realloc_count;
> +
> + if (node->err_off == 0)
> + continue;
> + err = RTE_PTR_ADD(node, node->err_off);
> + for (i = 0; i < stat->node_error_cntrs; i++)
> + stat->node_error_count[i] += err[i];
> }
>
> stat->calls = calls;
> @@ -464,6 +538,7 @@ rte_graph_cluster_stats_reset(struct
> rte_graph_cluster_stats *stat) {
> struct cluster_node *cluster;
> rte_node_t count;
> + uint8_t i;
>
> cluster = stat->clusters;
>
> @@ -479,6 +554,8 @@ rte_graph_cluster_stats_reset(struct
> rte_graph_cluster_stats *stat)
> node->prev_objs = 0;
> node->prev_cycles = 0;
> node->realloc_count = 0;
> + for (i = 0; i < node->node_error_cntrs; i++)
> + node->node_error_count[i] = 0;
> cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
> }
> }
> diff --git a/lib/graph/rte_graph.h b/lib/graph/rte_graph.h index
> b28143d737..12b6461cf5 100644
> --- a/lib/graph/rte_graph.h
> +++ b/lib/graph/rte_graph.h
> @@ -223,6 +223,10 @@ struct __rte_cache_aligned
> rte_graph_cluster_node_stats {
>
> uint64_t realloc_count; /**< Realloc count. */
>
> + uint8_t node_error_cntrs; /**< Number of
> Node error counters. */
> + char (*node_error_desc)[RTE_NODE_ERROR_DESC_SIZE]; /**< Names
> of the Node error counters. */
> + uint64_t *node_error_count; /**< Total error
> count per each error. */
> +
> rte_node_t id; /**< Node identifier of stats. */
> uint64_t hz; /**< Cycles per seconds. */
> char name[RTE_NODE_NAMESIZE]; /**< Name of the node. */
> --
> 2.25.1
More information about the dev
mailing list