[dpdk-dev] [PATCH v2] net/failsafe: improve stats accuracy

Matan Azrad matan at mellanox.com
Thu Oct 19 16:31:54 CEST 2017


The stats_get API was changed to signal a potential failure to read
stats. Furthermore, some PMDs are able to provide statistics even after
a removal event occurred.

Considering this, the fail-safe can try to access the latest statistics
of a PMD to improve statistics accuracy.

Attempt an ultimate statistics read on removal time; if that fails, use
the latest recorded snapshot.

Signed-off-by: Matan Azrad <matan at mellanox.com>
---
 drivers/net/failsafe/failsafe_ether.c   | 19 +++++++++++++++++--
 drivers/net/failsafe/failsafe_ops.c     | 10 ++++++++--
 drivers/net/failsafe/failsafe_private.h |  7 ++++++-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index f4db423..df38360 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -35,6 +35,7 @@
 
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
+#include <rte_cycles.h>
 
 #include "failsafe_private.h"
 
@@ -312,9 +313,23 @@
 static void
 fs_dev_stats_save(struct sub_device *sdev)
 {
+	struct rte_eth_stats stats;
+	int err;
+
+	/* Attempt to read current stats. */
+	err = rte_eth_stats_get(PORT_ID(sdev), &stats);
+	if (err) {
+		uint64_t cycles = sdev->stats_snapshot.cycles;
+
+		WARN("Could not access latest statistics from sub-device %d.\n",
+			 SUB_ID(sdev));
+		if (cycles != 0)
+			WARN("Using latest snapshot taken before %lu seconds.\n",
+				 (rte_rdtsc() - cycles) / rte_get_tsc_hz());
+	}
 	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
-			&sdev->stats_snapshot);
-	memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
+			err ? &sdev->stats_snapshot.stats : &stats);
+	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
 }
 
 static inline int
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index d360965..818f12d 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -38,6 +38,7 @@
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <rte_flow.h>
+#include <rte_cycles.h>
 
 #include "failsafe_private.h"
 
@@ -592,13 +593,18 @@
 
 	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
-		ret = rte_eth_stats_get(PORT_ID(sdev), &sdev->stats_snapshot);
+		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
+		uint64_t *cycles = &sdev->stats_snapshot.cycles;
+
+		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
 		if (ret) {
 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
 				  i, ret);
+			*cycles = 0;
 			return ret;
 		}
-		failsafe_stats_increment(stats, &sdev->stats_snapshot);
+		*cycles = rte_rdtsc();
+		failsafe_stats_increment(stats, snapshot);
 	}
 	return 0;
 }
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index d343ebf..1df52f4 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -93,6 +93,11 @@ enum dev_state {
 	DEV_STARTED,
 };
 
+struct fs_stats {
+	struct rte_eth_stats stats;
+	uint64_t cycles;
+};
+
 struct sub_device {
 	/* Exhaustive DPDK device description */
 	struct rte_devargs devargs;
@@ -103,7 +108,7 @@ struct sub_device {
 	/* Device state machine */
 	enum dev_state state;
 	/* Last stats snapshot passed to user */
-	struct rte_eth_stats stats_snapshot;
+	struct fs_stats stats_snapshot;
 	/* Some device are defined as a command line */
 	char *cmdline;
 	/* fail-safe device backreference */
-- 
1.8.3.1



More information about the dev mailing list