[dpdk-dev] [PATCH] net/mlx5: support e-switch flow count action

Moti Haimovsky motih at mellanox.com
Thu Sep 13 12:29:24 CEST 2018


This commit adds support for configuring flows destined to the mlx5
eswitch with 'count' action and for querying these counts at runtime.

It is possible to offload an interface flow rules to the hardware
using DPDK flow commands.
With mlx5 it is also possible to offload a limited set of flow rules to
the mlxsw (or e-switch) using the same DPDK flow commands using the
'transfer' attribute in the flow rule creation command.
The commands destined for the switch are transposed to TC flower rules
and are sent, as Netlink messages, to the mlx5 driver (or more precisely
to the netdev which represent the mlxsw port).
Each flow rule configured by the mlx5 driver is also assigned with a set
of flow counters implicitly. These counters can be retrieved when querying
the flow rule via Netlink, they can be found in each flow action section
of the reply.
Currently the limited set of eswitch flow rules does not contain the
'count' action but since every rule contains a count we can still retrieve
these values as if we configured a 'count' action.

Supporting the 'count' action in the flow configuration command is
straight-forward. When transposing the command to a tc flower Netlink
message we just ignore it instead of rejecting it.
So the following two commands will have the same affect and behavior:
  testpmd> flow create 0 transfer ingress pattern eth src is
           11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
           actions drop / end
  testpmd> flow create 0 transfer ingress pattern eth src is
           11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
           actions count / drop / end
In the flow query side, the command now also returns the counts the
above flow via using tc Netlink query command.
Special care was taken in order to prevent Netlink messages truncation
due to short buffers by using MNL_SOCKET_BUFFER_SIZE buffers which are
pre-allocate per port instance.

Signed-off-by: Moti Haimovsky <motih at mellanox.com>
---
Note:
 This patch should be applied on top of the new flow engine commits
 by orika and yskoh
---
 drivers/net/mlx5/mlx5.c            |   9 +-
 drivers/net/mlx5/mlx5.h            |   3 +
 drivers/net/mlx5/mlx5_flow.c       |  29 +++-
 drivers/net/mlx5/mlx5_flow.h       |  16 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
 drivers/net/mlx5/mlx5_flow_tcf.c   | 339 +++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_verbs.c |   1 +
 7 files changed, 392 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index e3c3671..b03ca7d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -289,6 +289,8 @@
 		close(priv->nl_socket_rdma);
 	if (priv->mnl_socket)
 		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+	if (priv->mnl_rcvbuf)
+		mlx5_flow_tcf_rcv_buf_destroy(priv->mnl_rcvbuf);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1139,7 +1141,8 @@
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
 	priv->mnl_socket = mlx5_flow_tcf_socket_create();
-	if (!priv->mnl_socket) {
+	priv->mnl_rcvbuf = mlx5_flow_tcf_rcv_buf_create();
+	if (!priv->mnl_socket || !priv->mnl_rcvbuf) {
 		err = -rte_errno;
 		DRV_LOG(WARNING,
 			"flow rules relying on switch offloads will not be"
@@ -1163,7 +1166,9 @@
 				" not be supported: %s: %s",
 				error.message, strerror(rte_errno));
 			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+			mlx5_flow_tcf_rcv_buf_destroy(priv->mnl_rcvbuf);
 			priv->mnl_socket = NULL;
+			priv->mnl_rcvbuf = NULL;
 		}
 	}
 	TAILQ_INIT(&priv->flows);
@@ -1220,6 +1225,8 @@
 			close(priv->nl_socket_rdma);
 		if (priv->mnl_socket)
 			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+		if (priv->mnl_rcvbuf)
+			mlx5_flow_tcf_rcv_buf_destroy(priv->mnl_rcvbuf);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 46942e2..4e9cf1d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -158,6 +158,7 @@ struct mlx5_drop {
 };
 
 struct mnl_socket;
+struct mlx5_flow_tcf_rbuf;
 
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -225,6 +226,8 @@ struct priv {
 	/* UAR same-page access control required in 32bit implementations. */
 #endif
 	struct mnl_socket *mnl_socket; /* Libmnl socket. */
+	struct mlx5_flow_tcf_rbuf *mnl_rcvbuf;
+	/* Buffer for receiving libmnl messages. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f9a64d3..178295c 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1599,6 +1599,17 @@ int mlx5_flow_validate_item_mpls(uint64_t item_flags __rte_unused,
 	return -rte_errno;
 }
 
+int
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+		struct rte_flow *flow __rte_unused,
+		enum rte_flow_action_type type __rte_unused,
+		void *data __rte_unused,
+		struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 /* Void driver to protect from null pointer reference. */
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
 	.validate = flow_null_validate,
@@ -1606,6 +1617,7 @@ int mlx5_flow_validate_item_mpls(uint64_t item_flags __rte_unused,
 	.translate = flow_null_translate,
 	.apply = flow_null_apply,
 	.remove = flow_null_remove,
+	.query = flow_null_query,
 };
 
 /**
@@ -2262,10 +2274,19 @@ struct rte_flow *
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-		      void *data __rte_unused,
+mlx5_flow_query_count(struct rte_eth_dev *dev,
+		      struct rte_flow *flow,
+		      void *data,
 		      struct rte_flow_error *error)
 {
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type ftype = flow->drv_type;
+
+	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(ftype);
+	if (ftype == MLX5_FLOW_TYPE_TCF)
+		return fops->query(dev, flow,
+				   RTE_FLOW_ACTION_TYPE_COUNT, data, error);
 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
 	if (flow->actions & MLX5_ACTION_COUNT) {
 		struct rte_flow_query_count *qc = data;
@@ -2315,7 +2336,7 @@ struct rte_flow *
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
 		struct rte_flow *flow,
 		const struct rte_flow_action *actions,
 		void *data,
@@ -2328,7 +2349,7 @@ struct rte_flow *
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
 		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_query_count(flow, data, error);
+			ret = mlx5_flow_query_count(dev, flow, data, error);
 			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 0c0a60f..4d304a9 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -127,6 +127,8 @@ enum mlx5_flow_drv_type {
 struct mlx5_flow_tcf {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
+	uint64_t hits;
+	uint64_t bytes;
 };
 
 /** Handles information leading to a drop fate. */
@@ -204,7 +206,6 @@ struct rte_flow {
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	uint32_t actions;
 };
@@ -229,15 +230,26 @@ typedef int (*mlx5_flow_apply_t)(struct rte_eth_dev *dev, struct rte_flow *flow,
 typedef int (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
 				  struct rte_flow *flow,
 				  struct rte_flow_error *error);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+				 struct rte_flow *flow,
+				 enum rte_flow_action_type type,
+				 void *data,
+				 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
 	mlx5_flow_translate_t translate;
 	mlx5_flow_apply_t apply;
 	mlx5_flow_remove_t remove;
+	mlx5_flow_query_t query;
 };
 
 /* mlx5_flow.c */
+int flow_null_query(struct rte_eth_dev *dev,
+		    struct rte_flow *flow,
+		    enum rte_flow_action_type type,
+		    void *data,
+		    struct rte_flow_error *error);
 int mlx5_flow_validate_action_flag(uint64_t action_flags,
 				   struct rte_flow_error *error);
 int mlx5_flow_validate_action_mark(uint64_t action_flags,
@@ -302,5 +314,7 @@ int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
 		       struct rte_flow_error *error);
 struct mnl_socket *mlx5_flow_tcf_socket_create(void);
 void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+struct mlx5_flow_tcf_rbuf *mlx5_flow_tcf_rcv_buf_create(void);
+void mlx5_flow_tcf_rcv_buf_destroy(struct mlx5_flow_tcf_rbuf *rb);
 
 #endif
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 75c1050..f5c0e32 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1429,6 +1429,7 @@ static int flow_dv_validate_attributes(struct rte_eth_dev *dev,
 	.translate = flow_dv_translate,
 	.apply = flow_dv_apply,
 	.remove = flow_dv_remove,
+	.query = flow_null_query,
 };
 
 #endif
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 769a13a..8ab0171 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -153,6 +154,16 @@ struct tc_vlan {
 #define IPV6_ADDR_LEN 16
 #endif
 
+/**
+ * Structure for holding netlink message buffer of MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_rbuf {
+	uint8_t *buf;
+	uint16_t bsize;
+};
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -704,6 +715,9 @@ struct flow_tcf_ptoi {
 					 "can't have multiple fate actions");
 			action_flags |= MLX5_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			action_flags |= MLX5_ACTION_COUNT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			action_flags |= MLX5_ACTION_OF_POP_VLAN;
 			break;
@@ -844,6 +858,9 @@ struct flow_tcf_ptoi {
 				SZ_NLATTR_TYPE_OF(struct tc_gact);
 			flags |= MLX5_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			flags |= MLX5_ACTION_COUNT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			flags |= MLX5_ACTION_OF_POP_VLAN;
 			goto action_of_vlan;
@@ -1330,6 +1347,12 @@ struct flow_tcf_ptoi {
 			mnl_attr_nest_end(nlh, na_act);
 			mnl_attr_nest_end(nlh, na_act_index);
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			/*
+			 * Driver adds the count action implicitly for
+			 * each rule it creates.
+			 */
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			conf.of_push_vlan = NULL;
 			vlan_act = TCA_VLAN_ACT_POP;
@@ -1511,12 +1534,328 @@ struct flow_tcf_ptoi {
 	return 0;
 }
 
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * being retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ * @return
+ *   0 on successful extraction of action counts, -1 otherwise.
+ */
+static void
+tc_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+	unsigned short type;
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+ /**
+  * Extract action counters from flower action.
+  *
+  * @param rta
+  *   flower action stats properties in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  * @return
+  *   0 on successful extraction of action counts, -1 otherwise.
+  */
+static int
+tc_flow_extract_stats_attr(struct rtattr *rta, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+	tc_parse_rtattr(tbs, TCA_STATS_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta));
+	if (tbs[TCA_STATS_BASIC]) {
+		struct gnet_stats_basic bs = {0};
+
+		memcpy(&bs, RTA_DATA(tbs[TCA_STATS_BASIC]),
+		       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+		       sizeof(bs)));
+		qc->bytes = bs.bytes;
+		qc->hits = bs.packets;
+		qc->bytes_set = 1;
+		qc->hits_set = 1;
+		return 0;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower single action retrieving the flow counters from it if present.
+  *
+  * @param arg
+  *   flower action properties in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  * @return
+  *   0 on successful retrieval of action counts, -1 otherwise.
+  */
+static int
+tc_flow_parse_one_action(struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_ACT_MAX + 1];
+
+	if (arg == NULL)
+		return -1;
+	tc_parse_rtattr(tb, TCA_ACT_MAX, RTA_DATA(arg), RTA_PAYLOAD(arg));
+	if (tb[TCA_ACT_KIND] == NULL)
+		return -1;
+	if (tb[TCA_ACT_STATS])
+		return tc_flow_extract_stats_attr(tb[TCA_ACT_STATS], qc);
+	return -1;
+}
+
+ /**
+  * Parse flower action section in the message, retrieving the flow counters
+  * from the first action that contains them.
+  * flow counters are stored in the actions defined by the flow and not in the
+  * flow itself, therefore we need to traverse the flower action in search for
+  * them.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  */
+static void
+tc_flow_parse_action(const struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int i;
+
+	if (arg == NULL)
+		return;
+	tc_parse_rtattr(tb, TCA_ACT_MAX_PRIO, RTA_DATA(arg), RTA_PAYLOAD(arg));
+	for (i = 0; i <= TCA_ACT_MAX_PRIO; i++)
+		if (tb[i])
+			if (tc_flow_parse_one_action(tb[i], qc) == 0)
+				break;
+}
+
+ /**
+  * Parse Netlink reply on flower type of filters, retrieving the flow counters
+  * from it.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  */
+static void
+tc_flower_parse_opt(struct rtattr *opt,
+		    struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+	if (!opt)
+		return;
+	tc_parse_rtattr(tb, TCA_FLOWER_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+	if (tb[TCA_FLOWER_ACT])
+		tc_flow_parse_action(tb[TCA_FLOWER_ACT], qc);
+}
+
+ /**
+  * Parse Netlink reply on filter query, retrieving the flow counters.
+  *
+  * @param nlh
+  *   Message received from Netlink.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  *
+  * @return
+  *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+  */
+static int
+mlx5_nl_flow_parse_filter(const struct nlmsghdr *nlh,
+			  struct rte_flow_query_count *qc)
+{
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	struct rtattr *tb[TCA_MAX + 1] = { };
+
+	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+	    nlh->nlmsg_type != RTM_GETTFILTER &&
+	    nlh->nlmsg_type != RTM_DELTFILTER)
+		return MNL_CB_OK;
+	len -= NLMSG_LENGTH(sizeof(*t));
+	if (len < 0)
+		return MNL_CB_ERROR;
+	tc_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+	if (tb[TCA_KIND])
+		if (strcmp(RTA_DATA(tb[TCA_KIND]), "flower") == 0)
+			tc_flower_parse_opt(tb[TCA_OPTIONS], qc);
+	return MNL_CB_OK;
+}
+
+/**
+ * A callback to parse Netlink reply on filter query attempting to retrieve the
+ * flow counters if present.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   pointer to the count statistics to be filled by the routine.
+ *
+ * @return
+ *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+ */
+static int
+mlx5_nl_flow_parse_message(const struct nlmsghdr *nlh, void *data)
+{
+	struct rte_flow_query_count *qc = (struct rte_flow_query_count *)data;
+
+	switch (nlh->nlmsg_type) {
+	case NLMSG_NOOP:
+		return MNL_CB_OK;
+	case NLMSG_ERROR:
+	case NLMSG_OVERRUN:
+		return MNL_CB_ERROR;
+	default:
+		break;
+	}
+	return mlx5_nl_flow_parse_filter(nlh, qc);
+}
+
+ /**
+  * Query a tcf rule for its statistics via netlink.
+  *
+  * @param[in] dev
+  *   Pointer to Ethernet device.
+  * @param[in] flow
+  *   Pointer to the sub flow.
+  * @param[out] data
+  *   data retrieved by the query.
+  * @param[out] error
+  *   Perform verbose error reporting if not NULL.
+  *
+  * @return
+  *   0 on success, a negative errno value otherwise and rte_errno is set.
+  */
+static int
+mlx5_flow_tcf_query(struct rte_eth_dev *dev,
+			  struct rte_flow *flow,
+			  enum rte_flow_action_type type,
+			  void *data,
+			  struct rte_flow_error *error)
+{
+	struct rte_flow_query_count *qc = data;
+	struct priv *priv = dev->data->dev_private;
+	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow_tcf_rbuf *rbuf = priv->mnl_rcvbuf;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	uint32_t seq = random();
+	ssize_t ret;
+
+	assert(qc);
+	assert(rbuf);
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	/* Currently only query count is supported. */
+	if (type != RTE_FLOW_ACTION_TYPE_COUNT)
+		goto error_nosup;
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_GETTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+	nlh->nlmsg_seq = seq;
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+		goto error_exit;
+	ret = mnl_socket_recvfrom(nl, rbuf->buf, rbuf->bsize);
+	if (ret == -1)
+		goto error_exit;
+	while (ret > 0) {
+		ret = mnl_cb_run(rbuf->buf, ret, seq,
+				 mnl_socket_get_portid(nl),
+				 mlx5_nl_flow_parse_message, qc);
+		if (ret <= MNL_CB_STOP)
+			break;
+		ret = mnl_socket_recvfrom(nl, rbuf->buf, rbuf->bsize);
+	}
+	/* Return the delta from last reset. */
+	qc->hits -= dev_flow->tcf.hits;
+	qc->bytes -= dev_flow->tcf.bytes;
+	if (qc->reset) {
+		dev_flow->tcf.hits = qc->hits;
+		dev_flow->tcf.bytes = qc->bytes;
+	}
+	return 0;
+error_nosup:
+	return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			 NULL, "tcf: unsupported query");
+error_exit:
+	return rte_flow_error_set
+			(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to read flow rule statistics");
+}
+
+/**
+ * Create netlink receive buffer.
+ * Netlink queries my result a large netlink reply, in case of a short
+ * receive buffer a reply message may be truncated. To avoid this,
+ * we allocate a buffer of MNL_SOCKET_BUFFER_SIZE (which is system dependent
+ * and ususally a 8KB long). Using this buffer size ensures that netlink
+ * messages will be stored without truncating.
+ *
+ * @return
+ *   pointer to mlx5_nl_rbuf created, NULL value otherwise.
+ */
+struct mlx5_flow_tcf_rbuf *
+mlx5_flow_tcf_rcv_buf_create(void)
+{
+	struct mlx5_flow_tcf_rbuf *rbuf =
+		rte_zmalloc(__func__,
+			    sizeof(struct mlx5_flow_tcf_rbuf),
+			    sizeof(uint32_t));
+	uint8_t *buf = rte_zmalloc(__func__,
+				   MNL_SOCKET_BUFFER_SIZE,
+				   sizeof(uint32_t));
+	if (!buf || !rbuf) {
+		rte_free(buf);
+		rte_free(rbuf);
+		return NULL;
+	}
+	rbuf->buf = buf;
+	rbuf->bsize = MNL_SOCKET_BUFFER_SIZE;
+	return rbuf;
+}
+
+ /**
+  * Destroy mlx5_flow_tcf_rbuf.
+  *
+  * @param rb
+  *   The receive buffer to destroy.
+  */
+void
+mlx5_flow_tcf_rcv_buf_destroy(struct mlx5_flow_tcf_rbuf *rb)
+{
+	if (rb) {
+		rte_free(rb->buf);
+		rte_free(rb);
+	}
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 	.validate = flow_tcf_validate,
 	.prepare = flow_tcf_prepare,
 	.translate = flow_tcf_translate,
 	.apply = flow_tcf_apply,
 	.remove = flow_tcf_remove,
+	.query = mlx5_flow_tcf_query,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index fc021a8..1708376 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1775,4 +1775,5 @@ struct ibv_spec_header {
 	.translate = flow_verbs_translate,
 	.apply = flow_verbs_apply,
 	.remove = flow_verbs_remove,
+	.query = flow_null_query,
 };
-- 
1.8.3.1



More information about the dev mailing list