[dpdk-dev] [PATCH 2/5] net/mlx5: e-switch VXLAN netlink routines update

Slava Ovsiienko viacheslavo at mellanox.com
Tue Oct 2 08:30:36 CEST 2018


This part of patchset updates Netlink exchange routines. Message sequence
numbers became not random ones, the multipart reply messages are supported,
not propagating errors to the following socket calls, Netlink replies
buffer size is increased to MNL_SOCKET_BUFFER_SIZE.

Suggested-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
---
 drivers/net/mlx5/mlx5.c          |  18 ++--
 drivers/net/mlx5/mlx5.h          |   7 +-
 drivers/net/mlx5/mlx5_flow.h     |   9 +-
 drivers/net/mlx5/mlx5_flow_tcf.c | 214 +++++++++++++++++++++++----------------
 4 files changed, 147 insertions(+), 101 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4be6a1c..201a26e 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -287,8 +287,7 @@
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+	mlx5_flow_tcf_socket_close(&priv->tcf_socket);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1138,8 +1137,9 @@
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_flow_tcf_socket_create();
-	if (!priv->mnl_socket) {
+	/* Initialize Netlink socket for e-switch control */
+	err = mlx5_flow_tcf_socket_open(&priv->tcf_socket);
+	if (err) {
 		err = -rte_errno;
 		DRV_LOG(WARNING,
 			"flow rules relying on switch offloads will not be"
@@ -1154,16 +1154,15 @@
 			error.message =
 				"cannot retrieve network interface index";
 		} else {
-			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
-						&error);
+			err = mlx5_flow_tcf_ifindex_init(&priv->tcf_socket,
+							 ifindex, &error);
 		}
 		if (err) {
 			DRV_LOG(WARNING,
 				"flow rules relying on switch offloads will"
 				" not be supported: %s: %s",
 				error.message, strerror(rte_errno));
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
-			priv->mnl_socket = NULL;
+			mlx5_flow_tcf_socket_close(&priv->tcf_socket);
 		}
 	}
 	TAILQ_INIT(&priv->flows);
@@ -1218,8 +1217,7 @@
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+		mlx5_flow_tcf_socket_close(&priv->tcf_socket);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8de0d74..b327a39 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -160,6 +160,11 @@ struct mlx5_drop {
 
 struct mnl_socket;
 
+struct mlx5_tcf_socket {
+	uint32_t seq; /* Message sequence number. */
+	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+};
+
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
 	struct rte_eth_dev_data *dev_data;  /* Pointer to device data. */
@@ -220,12 +225,12 @@ struct priv {
 	int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */
 	int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
 	uint32_t nl_sn; /* Netlink message sequence number. */
+	struct mlx5_tcf_socket tcf_socket; /* Libmnl socket for tcf. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
 	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
 	/* UAR same-page access control required in 32bit implementations. */
 #endif
-	struct mnl_socket *mnl_socket; /* Libmnl socket. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 2d56ced..fff905a 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -348,9 +348,10 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 
 /* mlx5_flow_tcf.c */
 
-int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		       struct rte_flow_error *error);
-struct mnl_socket *mlx5_flow_tcf_socket_create(void);
-void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+int mlx5_flow_tcf_ifindex_init(struct mlx5_tcf_socket *tcf,
+			       unsigned int ifindex,
+			       struct rte_flow_error *error);
+int mlx5_flow_tcf_socket_open(struct mlx5_tcf_socket *tcf);
+void mlx5_flow_tcf_socket_close(struct mlx5_tcf_socket *tcf);
 
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 5c93412..15e250c 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -1552,8 +1552,8 @@ struct flow_tcf_ptoi {
 /**
  * Send Netlink message with acknowledgment.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param tcf
+ *   Libmnl socket context to use.
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag before
  *   sending.
@@ -1562,26 +1562,108 @@ struct flow_tcf_ptoi {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_tcf_socket *tcf, struct nlmsghdr *nlh)
 {
 	alignas(struct nlmsghdr)
-	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
-		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
-	int ret;
-
+	uint8_t ans[MNL_SOCKET_BUFFER_SIZE];
+	unsigned int portid = mnl_socket_get_portid(tcf->nl);
+	uint32_t seq = tcf->seq++;
+	struct mnl_socket *nl = tcf->nl;
+	int err, ret;
+
+	assert(nl);
+	if (!seq)
+		seq = tcf->seq++;
 	nlh->nlmsg_flags |= NLM_F_ACK;
 	nlh->nlmsg_seq = seq;
 	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
-	if (ret != -1)
-		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
-	if (ret != -1)
-		ret = mnl_cb_run
-			(ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
+	err = (ret <= 0) ? -errno : 0;
+	nlh = (struct nlmsghdr *)ans;
+	/*
+	 * The following loop postpones non-fatal errors until multipart
+	 * messages are complete.
+	 */
 	if (ret > 0)
+		while (true) {
+			ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+			if (ret < 0) {
+				err = errno;
+				if (err != ENOSPC)
+					break;
+			}
+			if (!err) {
+				ret = mnl_cb_run(nlh, ret, seq, portid,
+						 NULL, NULL);
+				if (ret < 0) {
+					err = errno;
+					break;
+				}
+			}
+			/* Will receive till end of multipart message */
+			if (!(nlh->nlmsg_flags & NLM_F_MULTI) ||
+			      nlh->nlmsg_type == NLMSG_DONE)
+				break;
+		}
+	if (!err)
 		return 0;
-	rte_errno = errno;
-	return -rte_errno;
+	rte_errno = err;
+	return -err;
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param tcf
+ *   Libmnl socket context object.
+ * @param ifindex
+ *   Index of network interface to initialize.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_tcf_ifindex_init(struct mlx5_tcf_socket *tcf, unsigned int ifindex,
+		   struct rte_flow_error *error)
+{
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+	alignas(struct nlmsghdr)
+	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
+
+	/* Destroy existing ingress qdisc and everything attached to it. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_DELQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	/* Ignore errors when qdisc is already absent. */
+	if (flow_tcf_nl_ack(tcf, nlh) &&
+	    rte_errno != EINVAL && rte_errno != ENOENT)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "netlink: failed to remove ingress"
+					  " qdisc");
+	/* Create fresh ingress qdisc. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_NEWQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
+	if (flow_tcf_nl_ack(tcf, nlh))
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "netlink: failed to create ingress"
+					  " qdisc");
+	return 0;
 }
 
 /**
@@ -1602,18 +1684,25 @@ struct flow_tcf_ptoi {
 	       struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_tcf_socket *tcf = &priv->tcf_socket;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
+	int ret;
 
 	dev_flow = LIST_FIRST(&flow->dev_flows);
 	/* E-Switch flow can't be expanded. */
 	assert(!LIST_NEXT(dev_flow, next));
+	if (dev_flow->tcf.applied)
+		return 0;
 	nlh = dev_flow->tcf.nlh;
 	nlh->nlmsg_type = RTM_NEWTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	if (!flow_tcf_nl_ack(nl, nlh))
+	ret = flow_tcf_nl_ack(tcf, nlh);
+	if (!ret) {
+		dev_flow->tcf.applied = 1;
 		return 0;
+	}
+	DRV_LOG(WARNING, "Failed to create TC rule (%d)", rte_errno);
 	return rte_flow_error_set(error, rte_errno,
 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 				  "netlink: failed to create TC flow rule");
@@ -1631,7 +1720,7 @@ struct flow_tcf_ptoi {
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_tcf_socket *tcf = &priv->tcf_socket;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -1645,7 +1734,8 @@ struct flow_tcf_ptoi {
 	nlh = dev_flow->tcf.nlh;
 	nlh->nlmsg_type = RTM_DELTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST;
-	flow_tcf_nl_ack(nl, nlh);
+	flow_tcf_nl_ack(tcf, nlh);
+	dev_flow->tcf.applied = 0;
 }
 
 /**
@@ -1683,93 +1773,45 @@ struct flow_tcf_ptoi {
 };
 
 /**
- * Initialize ingress qdisc of a given network interface.
- *
- * @param nl
- *   Libmnl socket of the @p NETLINK_ROUTE kind.
- * @param ifindex
- *   Index of network interface to initialize.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
+ * Creates and configures a libmnl socket for Netlink flow rules.
  *
+ * @param tcf
+ *   tcf socket object to be initialized by function.
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		   struct rte_flow_error *error)
-{
-	struct nlmsghdr *nlh;
-	struct tcmsg *tcm;
-	alignas(struct nlmsghdr)
-	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
-
-	/* Destroy existing ingress qdisc and everything attached to it. */
-	nlh = mnl_nlmsg_put_header(buf);
-	nlh->nlmsg_type = RTM_DELQDISC;
-	nlh->nlmsg_flags = NLM_F_REQUEST;
-	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = ifindex;
-	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	tcm->tcm_parent = TC_H_INGRESS;
-	/* Ignore errors when qdisc is already absent. */
-	if (flow_tcf_nl_ack(nl, nlh) &&
-	    rte_errno != EINVAL && rte_errno != ENOENT)
-		return rte_flow_error_set(error, rte_errno,
-					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-					  "netlink: failed to remove ingress"
-					  " qdisc");
-	/* Create fresh ingress qdisc. */
-	nlh = mnl_nlmsg_put_header(buf);
-	nlh->nlmsg_type = RTM_NEWQDISC;
-	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = ifindex;
-	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	tcm->tcm_parent = TC_H_INGRESS;
-	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-	if (flow_tcf_nl_ack(nl, nlh))
-		return rte_flow_error_set(error, rte_errno,
-					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-					  "netlink: failed to create ingress"
-					  " qdisc");
-	return 0;
-}
-
-/**
- * Create and configure a libmnl socket for Netlink flow rules.
- *
- * @return
- *   A valid libmnl socket object pointer on success, NULL otherwise and
- *   rte_errno is set.
- */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+mlx5_flow_tcf_socket_open(struct mlx5_tcf_socket *tcf)
 {
 	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
 
+	tcf->nl = NULL;
 	if (nl) {
 		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
 				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
+		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID)) {
+			tcf->nl = nl;
+			tcf->seq = random();
+			return 0;
+		}
 	}
 	rte_errno = errno;
 	if (nl)
 		mnl_socket_close(nl);
-	return NULL;
+	return -rte_errno;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroys tcf object (closes MNL socket).
  *
- * @param nl
- *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ * @param tcf
+ *   tcf socket object to be destroyed by function.
  */
 void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_socket_close(struct mlx5_tcf_socket *tcf)
 {
-	mnl_socket_close(nl);
+	if (tcf->nl) {
+		mnl_socket_close(tcf->nl);
+		tcf->nl = NULL;
+	}
 }
-- 
1.8.3.1



More information about the dev mailing list