[dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action

Mordechay Haimovsky motih at mellanox.com
Thu Oct 11 15:04:35 CEST 2018


This commit adds support for configuring flows destined to the mlx5
eswitch with 'count' action and for querying these counts at runtime.

It is possible to offload an interface flow rules to the hardware
using DPDK flow commands.
With mlx5 it is also possible to offload a limited set of flow rules to
the mlxsw (or e-switch) using the same DPDK flow commands using the
'transfer' attribute in the flow rule creation command.
The commands destined for the switch are transposed to TC flower rules
and are sent, as Netlink messages, to the mlx5 driver (or more precisely
to the netdev which represent the mlxsw port).
Each flow rule configured by the mlx5 driver is also assigned with a set
of flow counters implicitly. These counters can be retrieved when querying
the flow rule via Netlink, they can be found in each flow action section
of the reply.
Currently the limited set of eswitch flow rules does not contain the
'count' action but since every rule contains a count we can still retrieve
these values as if we configured a 'count' action.

Supporting the 'count' action in the flow configuration command is
straight-forward. When transposing the command to a tc flower Netlink
message we just ignore it instead of rejecting it.
So the following two commands will have the same affect and behavior:
  testpmd> flow create 0 transfer ingress pattern eth src is
           11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
           actions drop / end
  testpmd> flow create 0 transfer ingress pattern eth src is
           11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
           actions count / drop / end
In the flow query side, the command now also returns the counts the
above flow via using tc Netlink query command.
Special care was taken in order to prevent Netlink messages truncation
due to short buffers by using MNL_SOCKET_BUFFER_SIZE buffers which are
pre-allocate per port instance.

Signed-off-by: Moti Haimovsky <motih at mellanox.com>
---
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5_flow.c       |  29 +++-
 drivers/net/mlx5/mlx5_flow.h       |  14 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
 drivers/net/mlx5/mlx5_flow_tcf.c   | 286 ++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_verbs.c |   1 +
 5 files changed, 325 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 6b2698a..7c6ece1 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1649,6 +1649,17 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 {
 }
 
+int
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+		struct rte_flow *flow __rte_unused,
+		enum rte_flow_action_type type __rte_unused,
+		void *data __rte_unused,
+		struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 /* Void driver to protect from null pointer reference. */
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
 	.validate = flow_null_validate,
@@ -1657,6 +1668,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	.apply = flow_null_apply,
 	.remove = flow_null_remove,
 	.destroy = flow_null_destroy,
+	.query = flow_null_query,
 };
 
 /**
@@ -2349,10 +2361,19 @@ struct rte_flow *
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-		      void *data __rte_unused,
+mlx5_flow_query_count(struct rte_eth_dev *dev,
+		      struct rte_flow *flow,
+		      void *data,
 		      struct rte_flow_error *error)
 {
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type ftype = flow->drv_type;
+
+	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(ftype);
+	if (ftype == MLX5_FLOW_TYPE_TCF)
+		return fops->query(dev, flow,
+				   RTE_FLOW_ACTION_TYPE_COUNT, data, error);
 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
 	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
 		struct rte_flow_query_count *qc = data;
@@ -2402,7 +2423,7 @@ struct rte_flow *
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
 		struct rte_flow *flow,
 		const struct rte_flow_action *actions,
 		void *data,
@@ -2415,7 +2436,7 @@ struct rte_flow *
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
 		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_query_count(flow, data, error);
+			ret = mlx5_flow_query_count(dev, flow, data, error);
 			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 41d55e8..3e1e9a0 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -175,6 +175,8 @@ struct mlx5_flow_dv {
 struct mlx5_flow_tcf {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
+	uint64_t hits;
+	uint64_t bytes;
 };
 
 /* Verbs specification header. */
@@ -232,7 +234,6 @@ struct rte_flow {
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	/**< Device flows that are part of the flow. */
 	uint32_t actions; /**< Bit-fields which mark all detected actions. */
@@ -258,6 +259,11 @@ typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
 				   struct rte_flow *flow);
 typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
 				    struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+				 struct rte_flow *flow,
+				 enum rte_flow_action_type type,
+				 void *data,
+				 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
@@ -265,10 +271,16 @@ struct mlx5_flow_driver_ops {
 	mlx5_flow_apply_t apply;
 	mlx5_flow_remove_t remove;
 	mlx5_flow_destroy_t destroy;
+	mlx5_flow_query_t query;
 };
 
 /* mlx5_flow.c */
 
+int flow_null_query(struct rte_eth_dev *dev,
+		    struct rte_flow *flow,
+		    enum rte_flow_action_type type,
+		    void *data,
+		    struct rte_flow_error *error);
 uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
 				     uint32_t layer_types,
 				     uint64_t hash_fields);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c9aa50f..9c8d074 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1365,6 +1365,7 @@
 	.apply = flow_dv_apply,
 	.remove = flow_dv_remove,
 	.destroy = flow_dv_destroy,
+	.query = flow_null_query,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 8535a15..d12a566 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -163,7 +164,8 @@ struct mlx5_flow_tcf_context {
 	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
 	uint32_t seq; /* Message sequence number. */
 	uint32_t buf_size; /* Message buffer size. */
-	uint8_t *buf; /* Message buffer. */
+	uint8_t *buf;
+	/* Message buffer (used for receiving large netlink messages). */
 };
 
 /** Empty masks for known item types. */
@@ -696,6 +698,9 @@ struct flow_tcf_ptoi {
 					 "can't have multiple fate actions");
 			action_flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			action_flags |= MLX5_FLOW_ACTION_COUNT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
@@ -875,6 +880,9 @@ struct flow_tcf_ptoi {
 				SZ_NLATTR_TYPE_OF(struct tc_gact);
 			flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			flags |= MLX5_FLOW_ACTION_COUNT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			goto action_of_vlan;
@@ -1360,6 +1368,12 @@ struct flow_tcf_ptoi {
 			mnl_attr_nest_end(nlh, na_act);
 			mnl_attr_nest_end(nlh, na_act_index);
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			/*
+			 * Driver adds the count action implicitly for
+			 * each rule it creates.
+			 */
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			conf.of_push_vlan = NULL;
 			vlan_act = TCA_VLAN_ACT_POP;
@@ -1564,6 +1578,275 @@ struct flow_tcf_ptoi {
 	rte_free(dev_flow);
 }
 
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * being retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ * @return
+ *   0 on successful extraction of action counts, -1 otherwise.
+ */
+static void
+tc_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+	unsigned short type;
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+ /**
+  * Extract action counters from flower action.
+  *
+  * @param rta
+  *   flower action stats properties in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  * @return
+  *   0 on successful extraction of action counts, -1 otherwise.
+  */
+static int
+tc_flow_extract_stats_attr(struct rtattr *rta, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+	tc_parse_rtattr(tbs, TCA_STATS_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta));
+	if (tbs[TCA_STATS_BASIC]) {
+		struct gnet_stats_basic bs = {0};
+
+		memcpy(&bs, RTA_DATA(tbs[TCA_STATS_BASIC]),
+		       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+		       sizeof(bs)));
+		qc->bytes = bs.bytes;
+		qc->hits = bs.packets;
+		qc->bytes_set = 1;
+		qc->hits_set = 1;
+		return 0;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower single action retrieving the flow counters from it if present.
+  *
+  * @param arg
+  *   flower action properties in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  * @return
+  *   0 on successful retrieval of action counts, -1 otherwise.
+  */
+static int
+tc_flow_parse_one_action(struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_ACT_MAX + 1];
+
+	if (arg == NULL)
+		return -1;
+	tc_parse_rtattr(tb, TCA_ACT_MAX, RTA_DATA(arg), RTA_PAYLOAD(arg));
+	if (tb[TCA_ACT_KIND] == NULL)
+		return -1;
+	if (tb[TCA_ACT_STATS])
+		return tc_flow_extract_stats_attr(tb[TCA_ACT_STATS], qc);
+	return -1;
+}
+
+ /**
+  * Parse flower action section in the message, retrieving the flow counters
+  * from the first action that contains them.
+  * flow counters are stored in the actions defined by the flow and not in the
+  * flow itself, therefore we need to traverse the flower action in search for
+  * them.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  */
+static void
+tc_flow_parse_action(const struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int i;
+
+	if (arg == NULL)
+		return;
+	tc_parse_rtattr(tb, TCA_ACT_MAX_PRIO, RTA_DATA(arg), RTA_PAYLOAD(arg));
+	for (i = 0; i <= TCA_ACT_MAX_PRIO; i++)
+		if (tb[i])
+			if (tc_flow_parse_one_action(tb[i], qc) == 0)
+				break;
+}
+
+ /**
+  * Parse Netlink reply on flower type of filters, retrieving the flow counters
+  * from it.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  */
+static void
+tc_flower_parse_opt(struct rtattr *opt,
+		    struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+	if (!opt)
+		return;
+	tc_parse_rtattr(tb, TCA_FLOWER_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+	if (tb[TCA_FLOWER_ACT])
+		tc_flow_parse_action(tb[TCA_FLOWER_ACT], qc);
+}
+
+ /**
+  * Parse Netlink reply on filter query, retrieving the flow counters.
+  *
+  * @param nlh
+  *   Message received from Netlink.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  *
+  * @return
+  *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+  */
+static int
+mlx5_nl_flow_parse_filter(const struct nlmsghdr *nlh,
+			  struct rte_flow_query_count *qc)
+{
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	struct rtattr *tb[TCA_MAX + 1] = { };
+
+	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+	    nlh->nlmsg_type != RTM_GETTFILTER &&
+	    nlh->nlmsg_type != RTM_DELTFILTER)
+		return MNL_CB_OK;
+	len -= NLMSG_LENGTH(sizeof(*t));
+	if (len < 0)
+		return MNL_CB_ERROR;
+	tc_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+	if (tb[TCA_KIND])
+		if (strcmp(RTA_DATA(tb[TCA_KIND]), "flower") == 0)
+			tc_flower_parse_opt(tb[TCA_OPTIONS], qc);
+	return MNL_CB_OK;
+}
+
+/**
+ * A callback to parse Netlink reply on filter query attempting to retrieve the
+ * flow counters if present.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   pointer to the count statistics to be filled by the routine.
+ *
+ * @return
+ *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+ */
+static int
+mlx5_nl_flow_parse_message(const struct nlmsghdr *nlh, void *data)
+{
+	struct rte_flow_query_count *qc = (struct rte_flow_query_count *)data;
+
+	switch (nlh->nlmsg_type) {
+	case NLMSG_NOOP:
+		return MNL_CB_OK;
+	case NLMSG_ERROR:
+	case NLMSG_OVERRUN:
+		return MNL_CB_ERROR;
+	default:
+		break;
+	}
+	return mlx5_nl_flow_parse_filter(nlh, qc);
+}
+
+ /**
+  * Query a tcf rule for its statistics via netlink.
+  *
+  * @param[in] dev
+  *   Pointer to Ethernet device.
+  * @param[in] flow
+  *   Pointer to the sub flow.
+  * @param[out] data
+  *   data retrieved by the query.
+  * @param[out] error
+  *   Perform verbose error reporting if not NULL.
+  *
+  * @return
+  *   0 on success, a negative errno value otherwise and rte_errno is set.
+  */
+static int
+mlx5_flow_tcf_query(struct rte_eth_dev *dev,
+			  struct rte_flow *flow,
+			  enum rte_flow_action_type type,
+			  void *data,
+			  struct rte_flow_error *error)
+{
+	struct rte_flow_query_count *qc = data;
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+	struct mnl_socket *nl = ctx->nl;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	uint32_t seq = priv->tcf_context->seq++;
+	ssize_t ret;
+	assert(qc);
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	/* Currently only query count is supported. */
+	if (type != RTE_FLOW_ACTION_TYPE_COUNT)
+		goto error_nosup;
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_GETTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+	nlh->nlmsg_seq = seq;
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+		goto error_exit;
+	ret = mnl_socket_recvfrom(nl, priv->tcf_context->buf,
+				  priv->tcf_context->buf_size);
+	if (ret == -1)
+		goto error_exit;
+	while (ret > 0) {
+		ret = mnl_cb_run(ctx->buf, ret, seq,
+				 mnl_socket_get_portid(nl),
+				 mlx5_nl_flow_parse_message, qc);
+		if (ret <= MNL_CB_STOP)
+			break;
+		ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+	}
+	/* Return the delta from last reset. */
+	qc->hits -= dev_flow->tcf.hits;
+	qc->bytes -= dev_flow->tcf.bytes;
+	if (qc->reset) {
+		dev_flow->tcf.hits = qc->hits;
+		dev_flow->tcf.bytes = qc->bytes;
+	}
+	return 0;
+error_nosup:
+	return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			 NULL, "tcf: unsupported query");
+error_exit:
+	return rte_flow_error_set
+			(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to read flow rule statistics");
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 	.validate = flow_tcf_validate,
 	.prepare = flow_tcf_prepare,
@@ -1571,6 +1854,7 @@ struct flow_tcf_ptoi {
 	.apply = flow_tcf_apply,
 	.remove = flow_tcf_remove,
 	.destroy = flow_tcf_destroy,
+	.query = mlx5_flow_tcf_query,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index ad8f7ac..e377b3b 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1655,4 +1655,5 @@
 	.apply = flow_verbs_apply,
 	.remove = flow_verbs_remove,
 	.destroy = flow_verbs_destroy,
+	.query = flow_null_query,
 };
-- 
1.8.3.1



More information about the dev mailing list