[dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action

Mordechay Haimovsky motih at mellanox.com
Wed Oct 17 01:50:21 CEST 2018


This commit adds support for configuring flows destined to the mlx5
eswitch with 'count' action and for querying these counts at runtime.

Each flow rule configured by the mlx5 driver is implicitly assigned
with flow counters. These counters can be retrieved when querying
the flow rule via Netlink, they can be found in each flow action
section of the reply. Hence, supporting the 'count' action in the
flow configuration command is straight-forward. When transposing
the command to a tc Netlink message we just ignore it instead of
rejecting it.
In the 'flow query count' side, the command now uses tc Netlink
query command in order to retrieve the values of the flow counters.

Signed-off-by: Moti Haimovsky <motih at mellanox.com>
---
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
 * Code modifications accordig to review by Shahaf S.
   see message ID: 1539263057-16678-3-git-send-email-motih at mellanox.com

v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5_flow.c       |  99 +++----
 drivers/net/mlx5/mlx5_flow.h       |  12 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
 drivers/net/mlx5/mlx5_flow_tcf.c   | 515 ++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_verbs.c |  87 +++++++
 5 files changed, 642 insertions(+), 72 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index bd70fce..dc68e46 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1653,6 +1653,19 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 {
 }
 
+int
+mlx5_flow_null_query(struct rte_eth_dev *dev __rte_unused,
+		     struct rte_flow *flow __rte_unused,
+		     const struct rte_flow_action *actions __rte_unused,
+		     void *data __rte_unused,
+		     struct rte_flow_error *error)
+{
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
 /* Void driver to protect from null pointer reference. */
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
 	.validate = flow_null_validate,
@@ -1661,6 +1674,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	.apply = flow_null_apply,
 	.remove = flow_null_remove,
 	.destroy = flow_null_destroy,
+	.query = mlx5_flow_null_query,
 };
 
 /**
@@ -2344,59 +2358,25 @@ struct rte_flow *
 }
 
 /**
- * Query flow counter.
- *
- * @param flow
- *   Pointer to the flow.
+ * Query a flows.
  *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ * @see rte_flow_query()
+ * @see rte_flow_ops
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-		      void *data __rte_unused,
-		      struct rte_flow_error *error)
+flow_drv_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
 {
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
-		struct rte_flow_query_count *qc = data;
-		uint64_t counters[2] = {0, 0};
-		struct ibv_query_counter_set_attr query_cs_attr = {
-			.cs = flow->counter->cs,
-			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
-		};
-		struct ibv_counter_set_data query_out = {
-			.out = counters,
-			.outlen = 2 * sizeof(uint64_t),
-		};
-		int err = mlx5_glue->query_counter_set(&query_cs_attr,
-						       &query_out);
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type ftype = flow->drv_type;
 
-		if (err)
-			return rte_flow_error_set
-				(error, err,
-				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				 NULL,
-				 "cannot read counter");
-		qc->hits_set = 1;
-		qc->bytes_set = 1;
-		qc->hits = counters[0] - flow->counter->hits;
-		qc->bytes = counters[1] - flow->counter->bytes;
-		if (qc->reset) {
-			flow->counter->hits = counters[0];
-			flow->counter->bytes = counters[1];
-		}
-		return 0;
-	}
-	return rte_flow_error_set(error, EINVAL,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "flow does not have counter");
-#endif
-	return rte_flow_error_set(error, ENOTSUP,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "counters are not available");
+	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(ftype);
+
+	return fops->query(dev, flow, actions, data, error);
 }
 
 /**
@@ -2406,30 +2386,17 @@ struct rte_flow *
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
 		struct rte_flow *flow,
 		const struct rte_flow_action *actions,
 		void *data,
 		struct rte_flow_error *error)
 {
-	int ret = 0;
+	int ret;
 
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_query_count(flow, data, error);
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  actions,
-						  "action not supported");
-		}
-		if (ret < 0)
-			return ret;
-	}
+	ret = flow_drv_query(dev, flow, actions, data, error);
+	if (ret < 0)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 5cb05ba..49fa1cb 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -239,7 +239,6 @@ struct rte_flow {
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	/**< Device flows that are part of the flow. */
 	uint32_t actions; /**< Bit-fields which mark all detected actions. */
@@ -265,6 +264,11 @@ typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
 				   struct rte_flow *flow);
 typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
 				    struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+				 struct rte_flow *flow,
+				 const struct rte_flow_action *actions,
+				 void *data,
+				 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
@@ -272,10 +276,16 @@ struct mlx5_flow_driver_ops {
 	mlx5_flow_apply_t apply;
 	mlx5_flow_remove_t remove;
 	mlx5_flow_destroy_t destroy;
+	mlx5_flow_query_t query;
 };
 
 /* mlx5_flow.c */
 
+int mlx5_flow_null_query(struct rte_eth_dev *dev,
+			 struct rte_flow *flow,
+			 const struct rte_flow_action *actions,
+			 void *data,
+			 struct rte_flow_error *error);
 uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
 				     uint32_t layer_types,
 				     uint64_t hash_fields);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index becbc57..188aca2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1370,6 +1370,7 @@
 	.apply = flow_dv_apply,
 	.remove = flow_dv_remove,
 	.destroy = flow_dv_destroy,
+	.query = mlx5_flow_null_query,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index f232373..b9f23ea 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -231,6 +232,10 @@ struct tc_pedit_sel {
 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
 #endif
 
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
 /**
  * Structure for holding netlink context.
  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
@@ -241,7 +246,16 @@ struct mlx5_flow_tcf_context {
 	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
 	uint32_t seq; /* Message sequence number. */
 	uint32_t buf_size; /* Message buffer size. */
-	uint8_t *buf; /* Message buffer. */
+	uint8_t *buf;
+	/* Message buffer (used for receiving large netlink messages). */
+};
+
+/** Structure used when extracting the values of a flow counters
+ * from a netlink message.
+ */
+struct flow_tcf_stats_basic {
+	bool valid;
+	struct gnet_stats_basic counters;
 };
 
 /** Empty masks for known item types. */
@@ -356,6 +370,51 @@ struct pedit_parser {
 	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
 };
 
+/**
+ * Create space for using the implicitly created TC flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   A pointer to the counter data structure, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_tcf_counter_new(void)
+{
+	struct mlx5_flow_counter *cnt;
+
+	struct mlx5_flow_counter tmpl = {
+		.ref_cnt = 1,
+		.shared = 0,
+		.id = 0,
+		.cs = NULL,
+		.hits = 0,
+		.bytes = 0,
+	};
+	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+	if (!cnt) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	*cnt = tmpl;
+	/* Implicit counter, do not add to list. */
+	return cnt;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ *   Pointer to the counter handler.
+ */
+static void
+flow_tcf_counter_release(struct mlx5_flow_counter *counter)
+{
+	if (--counter->ref_cnt == 0)
+		rte_free(counter);
+}
 
 /**
  * Set pedit key of transport (TCP/UDP) port value
@@ -1067,6 +1126,8 @@ struct pedit_parser {
 		case RTE_FLOW_ACTION_TYPE_DROP:
 			current_action_flag = MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
@@ -1342,6 +1403,8 @@ struct pedit_parser {
 				SZ_NLATTR_TYPE_OF(struct tc_gact);
 			flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			goto action_of_vlan;
@@ -1477,6 +1540,38 @@ struct pedit_parser {
 }
 
 /**
+ * Make adjustments for supporting count actions.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
+				  struct mlx5_flow *dev_flow,
+				  struct rte_flow_error *error)
+{
+	struct rte_flow *flow = dev_flow->flow;
+
+	if (!flow->counter) {
+		flow->counter = flow_tcf_counter_new();
+		if (!flow->counter)
+			return rte_flow_error_set(error, rte_errno,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  NULL,
+						  "cannot get counter"
+						  " context.");
+	}
+	return 0;
+}
+
+/**
  * Translate flow for Linux TC flower and construct Netlink message.
  *
  * @param[in] priv
@@ -1533,6 +1628,7 @@ struct pedit_parser {
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
 	uint64_t item_flags = 0;
+	int ret;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1875,6 +1971,16 @@ struct pedit_parser {
 			mnl_attr_nest_end(nlh, na_act);
 			mnl_attr_nest_end(nlh, na_act_index);
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			/*
+			 * Driver adds the count action implicitly for
+			 * each rule it creates.
+			 */
+			ret = flow_tcf_translate_action_count(dev,
+							      dev_flow, error);
+			if (ret < 0)
+				return ret;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			conf.of_push_vlan = NULL;
 			vlan_act = TCA_VLAN_ACT_POP;
@@ -2055,6 +2161,10 @@ struct pedit_parser {
 
 	if (!flow)
 		return;
+	if (flow->counter) {
+		flow_tcf_counter_release(flow->counter);
+		flow->counter = NULL;
+	}
 	dev_flow = LIST_FIRST(&flow->dev_flows);
 	if (!dev_flow)
 		return;
@@ -2091,6 +2201,400 @@ struct pedit_parser {
 	rte_free(dev_flow);
 }
 
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ */
+static void
+flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
+			 struct rtattr *rta, int len)
+{
+	unsigned short type;
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+ /**
+  * Extract flow counters from flower action.
+  *
+  * @param rta
+  *   flower action stats properties in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data holding the count statistics of the rte_flow retrieved from
+  *   the message.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
+				       uint16_t rta_type[], int idx,
+				       struct gnet_stats_basic *data)
+{
+	struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+	if (rta == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tbs, TCA_STATS_MAX,
+				 RTA_DATA(rta), RTA_PAYLOAD(rta));
+	switch (rta_type[idx]) {
+	case TCA_STATS_BASIC:
+		if (tbs[TCA_STATS_BASIC]) {
+			memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
+			       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+			       sizeof(*data)));
+			return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower single action retrieving the requested action attribute,
+  * if found.
+  *
+  * @param arg
+  *   flower action properties in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   Count statistics retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
+				     uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_ACT_MAX + 1];
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	if (tb[TCA_ACT_KIND] == NULL)
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_ACT_STATS:
+		if (tb[TCA_ACT_STATS])
+			return flow_tcf_nl_action_stats_parse_and_get
+					(tb[TCA_ACT_STATS],
+					 rta_type, --idx,
+					 (struct gnet_stats_basic *)data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower action section in the message retrieving the requested
+  * attribute from the first action that provides it.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_action_parse_and_get(const struct rtattr *arg,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int i;
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	switch (rta_type[idx]) {
+	/*
+	 * flow counters are stored in the actions defined by the flow
+	 * and not in the flow itself, therefore we need to traverse the
+	 * flower chain of actions in search for them.
+	 *
+	 * Note that the index is not decremented here.
+	 */
+	case TCA_ACT_STATS:
+		for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
+			if (tb[i] &&
+			!flow_tcf_nl_parse_one_action_and_get(tb[i],
+							      rta_type,
+							      idx, data))
+				return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower classifier options in the message, retrieving the requested
+  * attribute if found.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
+			       uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+	if (!opt || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_FLOWER_MAX,
+				 RTA_DATA(opt), RTA_PAYLOAD(opt));
+	switch (rta_type[idx]) {
+	case TCA_FLOWER_ACT:
+		if (tb[TCA_FLOWER_ACT])
+			return flow_tcf_nl_action_parse_and_get
+							(tb[TCA_FLOWER_ACT],
+							 rta_type, --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse Netlink reply on filter query, retrieving the flow counters.
+  *
+  * @param nlh
+  *   Message received from Netlink.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_filter_parse_and_get(const struct nlmsghdr *nlh,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	struct rtattr *tb[TCA_MAX + 1];
+
+	if (idx < 0)
+		return -1;
+	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+	    nlh->nlmsg_type != RTM_GETTFILTER &&
+	    nlh->nlmsg_type != RTM_DELTFILTER)
+		return -1;
+	len -= NLMSG_LENGTH(sizeof(*t));
+	if (len < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+	/* Not a TC flower flow - bail out */
+	if (!tb[TCA_KIND] ||
+	    strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_OPTIONS:
+		if (tb[TCA_OPTIONS])
+			return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
+							      rta_type,
+							      --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * A callback to parse Netlink reply on TC flower query.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   Pointer to data area to be filled by the parsing routine.
+ *   assumed to be a pinter to struct flow_tcf_stats_basic.
+ *
+ * @return
+ *   MNL_CB_OK value.
+ */
+static int
+flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
+{
+	/*
+	 * The backward sequence of rta_types to pass in order to get
+	 *  to the counters.
+	 */
+	uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
+				TCA_FLOWER_ACT, TCA_OPTIONS };
+	struct flow_tcf_stats_basic *sb_data = data;
+
+	if (!flow_tcf_nl_filter_parse_and_get(nlh, rta_type,
+					      ARRAY_SIZE(rta_type) - 1,
+					      (void *)&sb_data->counters))
+		sb_data->valid = true;
+	return MNL_CB_OK;
+}
+
+/**
+ * Query a TC flower rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_query_count(struct rte_eth_dev *dev,
+			  struct rte_flow *flow,
+			  void *data,
+			  struct rte_flow_error *error)
+{
+	struct flow_tcf_stats_basic sb_data = { 0 };
+	struct rte_flow_query_count *qc = data;
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+	struct mnl_socket *nl = ctx->nl;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	uint32_t seq = priv->tcf_context->seq++;
+	ssize_t ret;
+	assert(qc);
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	if (!dev_flow->flow->counter)
+		goto notsup_exit;
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_GETTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+	nlh->nlmsg_seq = seq;
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+		goto error_exit;
+	do {
+		ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+		if (ret <= 0)
+			break;
+		ret = mnl_cb_run(ctx->buf, ret, seq,
+				 mnl_socket_get_portid(nl),
+				 flow_tcf_nl_message_get_stats_basic,
+				 (void *)&sb_data);
+	} while (ret > 0);
+	/* Return the delta from last reset. */
+	if (sb_data.valid) {
+		/* Return the delta from last reset. */
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = sb_data.counters.packets;
+		qc->hits -= flow->counter->hits;
+		qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = sb_data.counters.packets;
+			flow->counter->bytes = sb_data.counters.bytes;
+		}
+		return 0;
+	}
+	return -1;
+error_exit:
+	return rte_flow_error_set
+			(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to read flow rule statistics");
+notsup_exit:
+	return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "counters are not available.");
+}
+
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_tcf_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
+{
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_tcf_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 	.validate = flow_tcf_validate,
 	.prepare = flow_tcf_prepare,
@@ -2098,6 +2602,7 @@ struct pedit_parser {
 	.apply = flow_tcf_apply,
 	.remove = flow_tcf_remove,
 	.destroy = flow_tcf_destroy,
+	.query = flow_tcf_query,
 };
 
 /**
@@ -2108,7 +2613,7 @@ struct pedit_parser {
  *   rte_errno is set.
  */
 static struct mnl_socket *
-mlx5_flow_mnl_socket_create(void)
+flow_tcf_mnl_socket_create(void)
 {
 	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
 
@@ -2131,7 +2636,7 @@ struct pedit_parser {
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 static void
-mlx5_flow_mnl_socket_destroy(struct mnl_socket *nl)
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
 {
 	if (nl)
 		mnl_socket_close(nl);
@@ -2208,7 +2713,7 @@ struct mlx5_flow_tcf_context *
 							sizeof(uint32_t));
 	if (!ctx)
 		goto error;
-	ctx->nl = mlx5_flow_mnl_socket_create();
+	ctx->nl = flow_tcf_mnl_socket_create();
 	if (!ctx->nl)
 		goto error;
 	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
@@ -2234,7 +2739,7 @@ struct mlx5_flow_tcf_context *
 {
 	if (!ctx)
 		return;
-	mlx5_flow_mnl_socket_destroy(ctx->nl);
+	flow_tcf_mnl_socket_destroy(ctx->nl);
 	rte_free(ctx->buf);
 	rte_free(ctx);
 }
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 65c849c..d0cfcda 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1651,6 +1651,92 @@
 	return -rte_errno;
 }
 
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query_count(struct rte_eth_dev *dev __rte_unused,
+		       struct rte_flow *flow __rte_unused,
+		       void *data __rte_unused,
+		       struct rte_flow_error *error)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
+		struct rte_flow_query_count *qc = data;
+		uint64_t counters[2] = {0, 0};
+		struct ibv_query_counter_set_attr query_cs_attr = {
+			.cs = flow->counter->cs,
+			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+		};
+		struct ibv_counter_set_data query_out = {
+			.out = counters,
+			.outlen = 2 * sizeof(uint64_t),
+		};
+		int err = mlx5_glue->query_counter_set(&query_cs_attr,
+						       &query_out);
+
+		if (err)
+			return rte_flow_error_set
+				(error, err,
+				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				 NULL,
+				 "cannot read counter");
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = counters[0] - flow->counter->hits;
+		qc->bytes = counters[1] - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = counters[0];
+			flow->counter->bytes = counters[1];
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, EINVAL,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "flow does not have counter");
+#endif
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query(struct rte_eth_dev *dev,
+		 struct rte_flow *flow,
+		 const struct rte_flow_action *actions,
+		 void *data,
+		 struct rte_flow_error *error)
+{
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_verbs_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
 	.validate = flow_verbs_validate,
 	.prepare = flow_verbs_prepare,
@@ -1658,4 +1744,5 @@
 	.apply = flow_verbs_apply,
 	.remove = flow_verbs_remove,
 	.destroy = flow_verbs_destroy,
+	.query = flow_verbs_query,
 };
-- 
1.8.3.1



More information about the dev mailing list