[dpdk-dev] [PATCH v2 7/7] net/mlx5: e-switch VXLAN rule cleanup routines

Viacheslav Ovsiienko viacheslavo at mellanox.com
Mon Oct 15 16:13:35 CEST 2018


The last part of patchset contains the rule cleanup routines.
These ones is the part of outer interface initialization at
the moment of VXLAN VTEP attaching. These routines query
the list of attached VXLAN devices, the list of local IP
addresses with peer and link scope attribute and the list
of permanent neigh rules, then all found abovementioned
items on the specified outer device are flushed.

Suggested-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 505 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 499 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index a1d7733..a3348ea 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -4012,6 +4012,502 @@ static LIST_HEAD(, mlx5_flow_tcf_vtep)
 }
 #endif /* HAVE_IFLA_VXLAN_COLLECT_METADATA */
 
+#define MNL_REQUEST_SIZE_MIN 256
+#define MNL_REQUEST_SIZE_MAX 2048
+#define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE), \
+				 MNL_REQUEST_SIZE_MIN), MNL_REQUEST_SIZE_MAX)
+
+/* Data structures used by flow_tcf_xxx_cb() routines. */
+struct tcf_nlcb_buf {
+	LIST_ENTRY(tcf_nlcb_buf) next;
+	uint32_t size;
+	alignas(struct nlmsghdr)
+	uint8_t msg[]; /**< Netlink message data. */
+};
+
+struct tcf_nlcb_context {
+	unsigned int ifindex; /**< Base interface index. */
+	uint32_t bufsize;
+	LIST_HEAD(, tcf_nlcb_buf) nlbuf;
+};
+
+/**
+ * Allocate space for netlink command in buffer list
+ *
+ * @param[in, out] ctx
+ *   Pointer to callback context with command buffers list.
+ * @param[in] size
+ *   Required size of data buffer to be allocated.
+ *
+ * @return
+ *   Pointer to allocated memory, aligned as message header.
+ *   NULL if some error occurred.
+ */
+static struct nlmsghdr *
+flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size)
+{
+	struct tcf_nlcb_buf *buf;
+	struct nlmsghdr *nlh;
+
+	size = NLMSG_ALIGN(size);
+	buf = LIST_FIRST(&ctx->nlbuf);
+	if (buf && (buf->size + size) <= ctx->bufsize) {
+		nlh = (struct nlmsghdr *)&buf->msg[buf->size];
+		buf->size += size;
+		return nlh;
+	}
+	if (size > ctx->bufsize) {
+		DRV_LOG(WARNING, "netlink: too long command buffer requested");
+		return NULL;
+	}
+	buf = rte_malloc(__func__,
+			ctx->bufsize + sizeof(struct tcf_nlcb_buf),
+			alignof(struct tcf_nlcb_buf));
+	if (!buf) {
+		DRV_LOG(WARNING, "netlink: no memory for command buffer");
+		return NULL;
+	}
+	LIST_INSERT_HEAD(&ctx->nlbuf, buf, next);
+	buf->size = size;
+	nlh = (struct nlmsghdr *)&buf->msg[0];
+	return nlh;
+}
+
+/**
+ * Set NLM_F_ACK flags in the last netlink command in buffer.
+ * Only last command in the buffer will be acked by system.
+ *
+ * @param[in, out] buf
+ *   Pointer to buffer with netlink commands.
+ */
+static void
+flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf)
+{
+	struct nlmsghdr *nlh;
+	uint32_t size = 0;
+
+	assert(buf->size);
+	do {
+		nlh = (struct nlmsghdr *)&buf->msg[size];
+		size += NLMSG_ALIGN(nlh->nlmsg_len);
+		if (size >= buf->size) {
+			nlh->nlmsg_flags |= NLM_F_ACK;
+			break;
+		}
+	} while (true);
+}
+
+/**
+ * Send the buffers with prepared netlink commands. Scans the list and
+ * sends all found buffers. Buffers are sent and freed anyway in order
+ * to prevent memory leakage if some every message in received packet.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in, out] ctx
+ *   Pointer to callback context with command buffers list.
+ *
+ * @return
+ *   Zero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf,
+		    struct tcf_nlcb_context *ctx)
+{
+	struct tcf_nlcb_buf *bc, *bn;
+	struct nlmsghdr *nlh;
+	int ret = 0;
+
+	bc = LIST_FIRST(&ctx->nlbuf);
+	while (bc) {
+		int rc;
+
+		bn = LIST_NEXT(bc, next);
+		if (bc->size) {
+			flow_tcf_setack_nlcmd(bc);
+			nlh = (struct nlmsghdr *)&bc->msg;
+			rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL);
+			if (rc && !ret)
+				ret = rc;
+		}
+		rte_free(bc);
+		bc = bn;
+	}
+	LIST_INIT(&ctx->nlbuf);
+	return ret;
+}
+
+/**
+ * Collect local IP address rules with scope link attribute  on specified
+ * network device. This is callback routine called by libmnl mnl_cb_run()
+ * in loop for every message in received packet.
+ *
+ * @param[in] nlh
+ *   Pointer to reply header.
+ * @param[in, out] arg
+ *   Opaque data pointer for this callback.
+ *
+ * @return
+ *   A positive, nonzero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg)
+{
+	struct tcf_nlcb_context *ctx = arg;
+	struct nlmsghdr *cmd;
+	struct ifaddrmsg *ifa;
+	struct nlattr *na;
+	struct nlattr *na_local = NULL;
+	struct nlattr *na_peer = NULL;
+	unsigned char family;
+
+	if (nlh->nlmsg_type != RTM_NEWADDR) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	ifa = mnl_nlmsg_get_payload(nlh);
+	family = ifa->ifa_family;
+	if (ifa->ifa_index != ctx->ifindex ||
+	    ifa->ifa_scope != RT_SCOPE_LINK ||
+	    !(ifa->ifa_flags & IFA_F_PERMANENT) ||
+	    (family != AF_INET && family != AF_INET6))
+		return 1;
+	mnl_attr_for_each(na, nlh, sizeof(*ifa)) {
+		switch (mnl_attr_get_type(na)) {
+		case IFA_LOCAL:
+			na_local = na;
+			break;
+		case IFA_ADDRESS:
+			na_peer = na;
+			break;
+		}
+		if (na_local && na_peer)
+			break;
+	}
+	if (!na_local || !na_peer)
+		return 1;
+	/* Local rule found with scope link, permanent and assigned peer. */
+	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
+					MNL_ALIGN(sizeof(struct ifaddrmsg)) +
+					(family == AF_INET6
+					? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
+					: 2 * SZ_NLATTR_TYPE_OF(uint32_t)));
+	if (!cmd) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	cmd = mnl_nlmsg_put_header(cmd);
+	cmd->nlmsg_type = RTM_DELADDR;
+	cmd->nlmsg_flags = NLM_F_REQUEST;
+	ifa = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifa));
+	ifa->ifa_flags = IFA_F_PERMANENT;
+	ifa->ifa_scope = RT_SCOPE_LINK;
+	ifa->ifa_index = ctx->ifindex;
+	if (family == AF_INET) {
+		ifa->ifa_family = AF_INET;
+		ifa->ifa_prefixlen = 32;
+		mnl_attr_put_u32(cmd, IFA_LOCAL, mnl_attr_get_u32(na_local));
+		mnl_attr_put_u32(cmd, IFA_ADDRESS, mnl_attr_get_u32(na_peer));
+	} else {
+		ifa->ifa_family = AF_INET6;
+		ifa->ifa_prefixlen = 128;
+		mnl_attr_put(cmd, IFA_LOCAL, IPV6_ADDR_LEN,
+			mnl_attr_get_payload(na_local));
+		mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN,
+			mnl_attr_get_payload(na_peer));
+	}
+	return 1;
+}
+
+/**
+ * Cleanup the local IP addresses on outer interface.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in] ifindex
+ *   Network inferface index to perform cleanup.
+ */
+static void
+flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf,
+			    unsigned int ifindex)
+{
+	struct nlmsghdr *nlh;
+	struct ifaddrmsg *ifa;
+	struct tcf_nlcb_context ctx = {
+		.ifindex = ifindex,
+		.bufsize = MNL_REQUEST_SIZE,
+		.nlbuf = LIST_HEAD_INITIALIZER(),
+	};
+	int ret;
+
+	assert(ifindex);
+	/*
+	 * Seek and destroy leftovers of local IP addresses with
+	 * matching properties "scope link".
+	 */
+	nlh = mnl_nlmsg_put_header(tcf->buf);
+	nlh->nlmsg_type = RTM_GETADDR;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa));
+	ifa->ifa_family = AF_UNSPEC;
+	ifa->ifa_index = ifindex;
+	ifa->ifa_scope = RT_SCOPE_LINK;
+	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_local_cb, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
+	ret = flow_tcf_send_nlcmd(tcf, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
+}
+
+/**
+ * Collect neigh permament rules on specified network device.
+ * This is callback routine called by libmnl mnl_cb_run() in loop for
+ * every message in received packet.
+ *
+ * @param[in] nlh
+ *   Pointer to reply header.
+ * @param[in, out] arg
+ *   Opaque data pointer for this callback.
+ *
+ * @return
+ *   A positive, nonzero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg)
+{
+	struct tcf_nlcb_context *ctx = arg;
+	struct nlmsghdr *cmd;
+	struct ndmsg *ndm;
+	struct nlattr *na;
+	struct nlattr *na_ip = NULL;
+	struct nlattr *na_mac = NULL;
+	unsigned char family;
+
+	if (nlh->nlmsg_type != RTM_NEWNEIGH) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	ndm = mnl_nlmsg_get_payload(nlh);
+	family = ndm->ndm_family;
+	if (ndm->ndm_ifindex != (int)ctx->ifindex ||
+	   !(ndm->ndm_state & NUD_PERMANENT) ||
+	   (family != AF_INET && family != AF_INET6))
+		return 1;
+	mnl_attr_for_each(na, nlh, sizeof(*ndm)) {
+		switch (mnl_attr_get_type(na)) {
+		case NDA_DST:
+			na_ip = na;
+			break;
+		case NDA_LLADDR:
+			na_mac = na;
+			break;
+		}
+		if (na_mac && na_ip)
+			break;
+	}
+	if (!na_mac || !na_ip)
+		return 1;
+	/* Neigh rule with permenent attribute found. */
+	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
+					MNL_ALIGN(sizeof(struct ndmsg)) +
+					SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) +
+					(family == AF_INET6
+					? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
+					: SZ_NLATTR_TYPE_OF(uint32_t)));
+	if (!cmd) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	cmd = mnl_nlmsg_put_header(cmd);
+	cmd->nlmsg_type = RTM_DELNEIGH;
+	cmd->nlmsg_flags = NLM_F_REQUEST;
+	ndm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ndm));
+	ndm->ndm_ifindex = ctx->ifindex;
+	ndm->ndm_state = NUD_PERMANENT;
+	ndm->ndm_flags = 0;
+	ndm->ndm_type = 0;
+	if (family == AF_INET) {
+		ndm->ndm_family = AF_INET;
+		mnl_attr_put_u32(cmd, NDA_DST, mnl_attr_get_u32(na_ip));
+	} else {
+		ndm->ndm_family = AF_INET6;
+		mnl_attr_put(cmd, NDA_DST, IPV6_ADDR_LEN,
+			     mnl_attr_get_payload(na_ip));
+	}
+	mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN,
+		     mnl_attr_get_payload(na_mac));
+	return 1;
+}
+
+/**
+ * Cleanup the neigh rules on outer interface.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in] ifindex
+ *   Network inferface index to perform cleanup.
+ */
+static void
+flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf,
+			    unsigned int ifindex)
+{
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+	struct tcf_nlcb_context ctx = {
+		.ifindex = ifindex,
+		.bufsize = MNL_REQUEST_SIZE,
+		.nlbuf = LIST_HEAD_INITIALIZER(),
+	};
+	int ret;
+
+	assert(ifindex);
+	/* Seek and destroy leftovers of neigh rules. */
+	nlh = mnl_nlmsg_put_header(tcf->buf);
+	nlh->nlmsg_type = RTM_GETNEIGH;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm));
+	ndm->ndm_family = AF_UNSPEC;
+	ndm->ndm_ifindex = ifindex;
+	ndm->ndm_state = NUD_PERMANENT;
+	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_neigh_cb, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
+	ret = flow_tcf_send_nlcmd(tcf, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
+}
+
+/**
+ * Collect indices of VXLAN encap/decap interfaces associated with device.
+ * This is callback routine called by libmnl mnl_cb_run() in loop for
+ * every message in received packet.
+ *
+ * @param[in] nlh
+ *   Pointer to reply header.
+ * @param[in, out] arg
+ *   Opaque data pointer for this callback.
+ *
+ * @return
+ *   A positive, nonzero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
+{
+	struct tcf_nlcb_context *ctx = arg;
+	struct nlmsghdr *cmd;
+	struct ifinfomsg *ifm;
+	struct nlattr *na;
+	struct nlattr *na_info = NULL;
+	struct nlattr *na_vxlan = NULL;
+	bool found = false;
+	unsigned int vxindex;
+
+	if (nlh->nlmsg_type != RTM_NEWLINK) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	ifm = mnl_nlmsg_get_payload(nlh);
+	if (!ifm->ifi_index) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	mnl_attr_for_each(na, nlh, sizeof(*ifm))
+		if (mnl_attr_get_type(na) == IFLA_LINKINFO) {
+			na_info = na;
+			break;
+		}
+	if (!na_info)
+		return 1;
+	mnl_attr_for_each_nested(na, na_info) {
+		switch (mnl_attr_get_type(na)) {
+		case IFLA_INFO_KIND:
+			if (!strncmp("vxlan", mnl_attr_get_str(na),
+				     mnl_attr_get_len(na)))
+				found = true;
+			break;
+		case IFLA_INFO_DATA:
+			na_vxlan = na;
+			break;
+		}
+		if (found && na_vxlan)
+			break;
+	}
+	if (!found || !na_vxlan)
+		return 1;
+	found = false;
+	mnl_attr_for_each_nested(na, na_vxlan) {
+		if (mnl_attr_get_type(na) == IFLA_VXLAN_LINK &&
+		    mnl_attr_get_u32(na) == ctx->ifindex) {
+			found = true;
+			break;
+		}
+	}
+	if (!found)
+		return 1;
+	/* Attached VXLAN device found, store the command to delete. */
+	vxindex = ifm->ifi_index;
+	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
+					MNL_ALIGN(sizeof(struct ifinfomsg)));
+	if (!nlh) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	cmd = mnl_nlmsg_put_header(cmd);
+	cmd->nlmsg_type = RTM_DELLINK;
+	cmd->nlmsg_flags = NLM_F_REQUEST;
+	ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm));
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->ifi_index = vxindex;
+	return 1;
+}
+
+/**
+ * Cleanup the outer interface. Removes all found vxlan devices
+ * attached to specified index, flushes the meigh and local IP
+ * datavase.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in] ifindex
+ *   Network inferface index to perform cleanup.
+ */
+static void
+flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf,
+			    unsigned int ifindex)
+{
+	struct nlmsghdr *nlh;
+	struct ifinfomsg *ifm;
+	struct tcf_nlcb_context ctx = {
+		.ifindex = ifindex,
+		.bufsize = MNL_REQUEST_SIZE,
+		.nlbuf = LIST_HEAD_INITIALIZER(),
+	};
+	int ret;
+
+	assert(ifindex);
+	/*
+	 * Seek and destroy leftover VXLAN encap/decap interfaces with
+	 * matching properties.
+	 */
+	nlh = mnl_nlmsg_put_header(tcf->buf);
+	nlh->nlmsg_type = RTM_GETLINK;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
+	ifm->ifi_family = AF_UNSPEC;
+	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_vxlan_cb, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
+	ret = flow_tcf_send_nlcmd(tcf, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
+}
+
+
 /**
  * Create target interface index for VXLAN tunneling decapsulation.
  * In order to share the UDP port within the other interfaces the
@@ -4100,12 +4596,9 @@ static LIST_HEAD(, mlx5_flow_tcf_vtep)
 		uint16_t pcnt;
 
 		/* Not found, we should create the new attached VTEP. */
-/*
- * TODO: not implemented yet
- * flow_tcf_encap_iface_cleanup(tcf, ifouter);
- * flow_tcf_encap_local_cleanup(tcf, ifouter);
- * flow_tcf_encap_neigh_cleanup(tcf, ifouter);
- */
+		flow_tcf_encap_iface_cleanup(tcf, ifouter);
+		flow_tcf_encap_local_cleanup(tcf, ifouter);
+		flow_tcf_encap_neigh_cleanup(tcf, ifouter);
 		for (pcnt = 0; pcnt <= (MLX5_VXLAN_PORT_RANGE_MAX
 				     - MLX5_VXLAN_PORT_RANGE_MIN); pcnt++) {
 			encap_port++;
-- 
1.8.3.1



More information about the dev mailing list