[dpdk-dev] [PATCH v3] net/mlx5: add workaround for VLAN in virtual machine

Viacheslav Ovsiienko viacheslavo at mellanox.com
Tue Jul 30 11:20:24 CEST 2019


On some virtual setups (particularly on ESXi) when we have SR-IOV and
E-Switch enabled there is the problem to receive VLAN traffic on VF
interfaces. The NIC driver in ESXi hypervisor does not setup E-Switch
vport setting correctly and VLAN traffic targeted to VF is dropped.

The patch provides the temporary workaround - if the rule
containing the VLAN pattern is being installed for VF the VLAN
network interface over VF is created, like the command does:

  ip link add link vf.if name mlx5.wa.1.100 type vlan id 100

The PMD in DPDK maintains the database of created VLAN interfaces
for each existing VF and requested VLAN tags. When all of the RTE
Flows using the given VLAN tag are removed the created VLAN interface
with this VLAN tag is deleted.

The name of created VLAN interface follows the format:

  evmlx.d1.d2, where d1 is VF interface ifindex, d2 - VLAN ifindex

Implementation limitations:

- mask in rules is ignored, rule must specify VLAN tags exactly,
  no wildcards (which are implemented by the masks) are allowed

- virtual environment is detected via rte_hypervisor() call,
  and the type of hypervisor is checked. Currently we engage
  the workaround for ESXi and unrecognized hypervisors (which
  always happen on platforms other than x86 - it means workaround
  applied for the Flow over PCI VF). There are no confirmed data
  the other hypervisors (HyperV, Qemu) need this workaround,
  we are trying to reduce the list of configurations on those
  workaround should be applied.
  
Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
Acked-by: Matan Azrad <matan at mellanox.com>

---
v3: - address the comments, workaround engaged on ESXi and
      unrecognized hypervisors only. 
v2: - http://patches.dpdk.org/patch/57257/ 
    - rebase
v1: - http://patches.dpdk.org/patch/56450/
---
 drivers/net/mlx5/mlx5.c            |   6 +
 drivers/net/mlx5/mlx5.h            |  33 +++++
 drivers/net/mlx5/mlx5_flow.c       |  22 +++
 drivers/net/mlx5/mlx5_flow.h       |   5 +
 drivers/net/mlx5/mlx5_flow_dv.c    |  33 ++++-
 drivers/net/mlx5/mlx5_flow_verbs.c |  25 +++-
 drivers/net/mlx5/mlx5_nl.c         | 294 +++++++++++++++++++++++++++++++++++++
 7 files changed, 414 insertions(+), 4 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 3a345c7..f5bc31f 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -843,6 +843,8 @@ struct mlx5_dev_spawn_data {
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
+	if (priv->vmwa_context)
+		mlx5_vlan_vmwa_exit(priv->vmwa_context);
 	if (priv->sh) {
 		/*
 		 * Free the shared context in last turn, because the cleanup
@@ -1990,6 +1992,8 @@ struct mlx5_dev_spawn_data {
 	mlx5_set_min_inline(spawn, &config);
 	/* Store device configuration on private structure. */
 	priv->config = config;
+	/* Create context for virtual machine VLAN workaround. */
+	priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex);
 	if (config.dv_flow_en) {
 		err = mlx5_alloc_shared_dr(priv);
 		if (err)
@@ -2016,6 +2020,8 @@ struct mlx5_dev_spawn_data {
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
+		if (priv->vmwa_context)
+			mlx5_vlan_vmwa_exit(priv->vmwa_context);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e812374..caf2491 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -355,6 +355,30 @@ enum mlx5_verbs_alloc_type {
 	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
 };
 
+/* VLAN netdev for VLAN workaround. */
+struct mlx5_vlan_dev {
+	uint32_t refcnt;
+	uint32_t ifindex; /**< Own interface index. */
+};
+
+/* Structure for VF VLAN workaround. */
+struct mlx5_vf_vlan {
+	uint32_t tag:12;
+	uint32_t created:1;
+};
+
+/*
+ * Array of VLAN devices created on the base of VF
+ * used for workaround in virtual environments.
+ */
+struct mlx5_vlan_vmwa_context {
+	int nl_socket;
+	uint32_t nl_sn;
+	uint32_t vf_ifindex;
+	struct rte_eth_dev *dev;
+	struct mlx5_vlan_dev vlan_dev[4096];
+};
+
 /**
  * Verbs allocator needs a context to know in the callback which kind of
  * resources it is allocating.
@@ -631,6 +655,7 @@ struct mlx5_priv {
 	int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
 	uint32_t nl_sn; /* Netlink message sequence number. */
 	LIST_HEAD(dbrpage, mlx5_devx_dbr_page) dbrpgs; /* Door-bell pages. */
+	struct mlx5_vlan_vmwa_context *vmwa_context; /* VLAN WA context. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
 	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
@@ -830,6 +855,14 @@ int mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct rte_ether_addr *mac,
 int mlx5_nl_switch_info(int nl, unsigned int ifindex,
 			struct mlx5_switch_info *info);
 
+struct mlx5_vlan_vmwa_context *mlx5_vlan_vmwa_init(struct rte_eth_dev *dev,
+						   uint32_t ifindex);
+void mlx5_vlan_vmwa_exit(struct mlx5_vlan_vmwa_context *ctx);
+void mlx5_vlan_vmwa_release(struct rte_eth_dev *dev,
+			    struct mlx5_vf_vlan *vf_vlan);
+void mlx5_vlan_vmwa_acquire(struct rte_eth_dev *dev,
+			    struct mlx5_vf_vlan *vf_vlan);
+
 /* mlx5_devx_cmds.c */
 
 struct mlx5_devx_obj *mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 3d2d5fc..f40fee5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1204,6 +1204,8 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
  *   Item specification.
  * @param[in] item_flags
  *   Bit-fields that holds the items detected until now.
+ * @param[in] dev
+ *   Ethernet device flow is being created on.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -1213,6 +1215,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 int
 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
 			     uint64_t item_flags,
+			     struct rte_eth_dev *dev,
 			     struct rte_flow_error *error)
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
@@ -1247,6 +1250,25 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 					error);
 	if (ret)
 		return ret;
+	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
+		struct mlx5_priv *priv = dev->data->dev_private;
+
+		if (priv->vmwa_context) {
+			/*
+			 * Non-NULL context means we have a virtual machine
+			 * and SR-IOV enabled, we have to create VLAN interface
+			 * to make hypervisor to setup E-Switch vport
+			 * context correctly. We avoid creating the multiple
+			 * VLAN interfaces, so we cannot support VLAN tag mask.
+			 */
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  item,
+						  "VLAN tag mask is not"
+						  " supported in virtual"
+						  " environment");
+		}
+	}
 	if (spec) {
 		vlan_tag = spec->tci;
 		vlan_tag &= mask->tci;
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 24da74b..822ff36 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -330,6 +330,8 @@ struct mlx5_flow_dv {
 	/**< Pointer to the jump action resource. */
 	struct mlx5_flow_dv_port_id_action_resource *port_id_action;
 	/**< Pointer to port ID action resource. */
+	struct mlx5_vf_vlan vf_vlan;
+	/**< Structure for VF VLAN workaround. */
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	void *actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
 	/**< Action list. */
@@ -355,6 +357,8 @@ struct mlx5_flow_verbs {
 	struct ibv_flow *flow; /**< Verbs flow pointer. */
 	struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
 	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
+	struct mlx5_vf_vlan vf_vlan;
+	/**< Structure for VF VLAN workaround. */
 };
 
 /** Device flow structure. */
@@ -505,6 +509,7 @@ int mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
 				struct rte_flow_error *error);
 int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
 				 uint64_t item_flags,
+				 struct rte_eth_dev *dev,
 				 struct rte_flow_error *error);
 int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
 				  uint64_t item_flags,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 59ef716..9c0a261 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2892,7 +2892,7 @@ struct field_modify_info modify_tcp[] = {
 			break;
 		case RTE_FLOW_ITEM_TYPE_VLAN:
 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
-							   error);
+							   dev, error);
 			if (ret < 0)
 				return ret;
 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
@@ -3450,6 +3450,8 @@ struct field_modify_info modify_tcp[] = {
 /**
  * Add VLAN item to matcher and to the value.
  *
+ * @param[in, out] dev_flow
+ *   Flow descriptor.
  * @param[in, out] matcher
  *   Flow matcher.
  * @param[in, out] key
@@ -3460,7 +3462,8 @@ struct field_modify_info modify_tcp[] = {
  *   Item is inner pattern.
  */
 static void
-flow_dv_translate_item_vlan(void *matcher, void *key,
+flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow,
+			    void *matcher, void *key,
 			    const struct rte_flow_item *item,
 			    int inner)
 {
@@ -3487,6 +3490,12 @@ struct field_modify_info modify_tcp[] = {
 		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
 					 outer_headers);
 		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+		/*
+		 * This is workaround, masks are not supported,
+		 * and pre-validated.
+		 */
+		dev_flow->dv.vf_vlan.tag =
+			rte_be_to_cpu_16(vlan_v->tci) & 0x0fff;
 	}
 	tci_m = rte_be_to_cpu_16(vlan_m->tci);
 	tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci);
@@ -4995,7 +5004,8 @@ struct field_modify_info modify_tcp[] = {
 					     MLX5_FLOW_LAYER_OUTER_L2;
 			break;
 		case RTE_FLOW_ITEM_TYPE_VLAN:
-			flow_dv_translate_item_vlan(match_mask, match_value,
+			flow_dv_translate_item_vlan(dev_flow,
+						    match_mask, match_value,
 						    items, tunnel);
 			matcher.priority = MLX5_PRIORITY_MAP_L2;
 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
@@ -5211,6 +5221,17 @@ struct field_modify_info modify_tcp[] = {
 					   "hardware refuses to create flow");
 			goto error;
 		}
+		if (priv->vmwa_context &&
+		    dev_flow->dv.vf_vlan.tag &&
+		    !dev_flow->dv.vf_vlan.created) {
+			/*
+			 * The rule contains the VLAN pattern.
+			 * For VF we are going to create VLAN
+			 * interface to make hypervisor set correct
+			 * e-Switch vport context.
+			 */
+			mlx5_vlan_vmwa_acquire(dev, &dev_flow->dv.vf_vlan);
+		}
 	}
 	return 0;
 error:
@@ -5224,6 +5245,9 @@ struct field_modify_info modify_tcp[] = {
 				mlx5_hrxq_release(dev, dv->hrxq);
 			dv->hrxq = NULL;
 		}
+		if (dev_flow->dv.vf_vlan.tag &&
+		    dev_flow->dv.vf_vlan.created)
+			mlx5_vlan_vmwa_release(dev, &dev_flow->dv.vf_vlan);
 	}
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
@@ -5424,6 +5448,9 @@ struct field_modify_info modify_tcp[] = {
 				mlx5_hrxq_release(dev, dv->hrxq);
 			dv->hrxq = NULL;
 		}
+		if (dev_flow->dv.vf_vlan.tag &&
+		    dev_flow->dv.vf_vlan.created)
+			mlx5_vlan_vmwa_release(dev, &dev_flow->dv.vf_vlan);
 	}
 }
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index fd6f2d5..c5b28e3 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -391,6 +391,9 @@
 		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
 	else
 		flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
+	if (!tunnel)
+		dev_flow->verbs.vf_vlan.tag =
+			rte_be_to_cpu_16(spec->tci) & 0x0fff;
 }
 
 /**
@@ -1054,7 +1057,7 @@
 			break;
 		case RTE_FLOW_ITEM_TYPE_VLAN:
 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
-							   error);
+							   dev, error);
 			if (ret < 0)
 				return ret;
 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
@@ -1592,6 +1595,10 @@
 				mlx5_hrxq_release(dev, verbs->hrxq);
 			verbs->hrxq = NULL;
 		}
+		if (dev_flow->verbs.vf_vlan.tag &&
+		    dev_flow->verbs.vf_vlan.created) {
+			mlx5_vlan_vmwa_release(dev, &dev_flow->verbs.vf_vlan);
+		}
 	}
 }
 
@@ -1639,6 +1646,7 @@
 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 		 struct rte_flow_error *error)
 {
+	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_verbs *verbs;
 	struct mlx5_flow *dev_flow;
 	int err;
@@ -1688,6 +1696,17 @@
 					   "hardware refuses to create flow");
 			goto error;
 		}
+		if (priv->vmwa_context &&
+		    dev_flow->verbs.vf_vlan.tag &&
+		    !dev_flow->verbs.vf_vlan.created) {
+			/*
+			 * The rule contains the VLAN pattern.
+			 * For VF we are going to create VLAN
+			 * interface to make hypervisor set correct
+			 * e-Switch vport context.
+			 */
+			mlx5_vlan_vmwa_acquire(dev, &dev_flow->verbs.vf_vlan);
+		}
 	}
 	return 0;
 error:
@@ -1701,6 +1720,10 @@
 				mlx5_hrxq_release(dev, verbs->hrxq);
 			verbs->hrxq = NULL;
 		}
+		if (dev_flow->verbs.vf_vlan.tag &&
+		    dev_flow->verbs.vf_vlan.created) {
+			mlx5_vlan_vmwa_release(dev, &dev_flow->verbs.vf_vlan);
+		}
 	}
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c
index 5773fa7..f0f57de 100644
--- a/drivers/net/mlx5/mlx5_nl.c
+++ b/drivers/net/mlx5/mlx5_nl.c
@@ -12,11 +12,14 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <stdalign.h>
 #include <string.h>
 #include <sys/socket.h>
 #include <unistd.h>
 
 #include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_hypervisor.h>
 
 #include "mlx5.h"
 #include "mlx5_utils.h"
@@ -28,6 +31,8 @@
 /* Receive buffer size for the Netlink socket */
 #define MLX5_RECV_BUF_SIZE 32768
 
+/** Parameters of VLAN devices created by driver. */
+#define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
 /*
  * Define NDA_RTA as defined in iproute2 sources.
  *
@@ -987,3 +992,292 @@ struct mlx5_nl_ifindex_data {
 	}
 	return ret;
 }
+
+/*
+ * Delete VLAN network device by ifindex.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_vlan_vmwa_init().
+ * @param[in] ifindex
+ *   Interface index of network device to delete.
+ */
+static void
+mlx5_vlan_vmwa_delete(struct mlx5_vlan_vmwa_context *vmwa,
+		      uint32_t ifindex)
+{
+	int ret;
+	struct {
+		struct nlmsghdr nh;
+		struct ifinfomsg info;
+	} req = {
+		.nh = {
+			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+			.nlmsg_type = RTM_DELLINK,
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+		},
+		.info = {
+			.ifi_family = AF_UNSPEC,
+			.ifi_index = ifindex,
+		},
+	};
+
+	if (ifindex) {
+		++vmwa->nl_sn;
+		if (!vmwa->nl_sn)
+			++vmwa->nl_sn;
+		ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, vmwa->nl_sn);
+		if (ret >= 0)
+			ret = mlx5_nl_recv(vmwa->nl_socket,
+					   vmwa->nl_sn,
+					   NULL, NULL);
+		if (ret < 0)
+			DRV_LOG(WARNING, "netlink: error deleting"
+					 " VLAN WA ifindex %u, %d",
+					 ifindex, ret);
+	}
+}
+
+/* Set of subroutines to build Netlink message. */
+static struct nlattr *
+nl_msg_tail(struct nlmsghdr *nlh)
+{
+	return (struct nlattr *)
+		(((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
+}
+
+static void
+nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
+{
+	struct nlattr *nla = nl_msg_tail(nlh);
+
+	nla->nla_type = type;
+	nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen);
+	nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len;
+
+	if (alen)
+		memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
+}
+
+static struct nlattr *
+nl_attr_nest_start(struct nlmsghdr *nlh, int type)
+{
+	struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
+
+	nl_attr_put(nlh, type, NULL, 0);
+	return nest;
+}
+
+static void
+nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
+{
+	nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
+}
+
+/*
+ * Create network VLAN device with specified VLAN tag.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_vlan_vmwa_init().
+ * @param[in] ifindex
+ *   Base network interface index.
+ * @param[in] tag
+ *   VLAN tag for VLAN network device to create.
+ */
+static uint32_t
+mlx5_vlan_vmwa_create(struct mlx5_vlan_vmwa_context *vmwa,
+		      uint32_t ifindex,
+		      uint16_t tag)
+{
+	struct nlmsghdr *nlh;
+	struct ifinfomsg *ifm;
+	char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
+
+	alignas(RTE_CACHE_LINE_SIZE)
+	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		    NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
+		    NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
+		    NLMSG_ALIGN(sizeof(uint32_t)) +
+		    NLMSG_ALIGN(sizeof(name)) +
+		    NLMSG_ALIGN(sizeof("vlan")) +
+		    NLMSG_ALIGN(sizeof(uint32_t)) +
+		    NLMSG_ALIGN(sizeof(uint16_t)) + 16];
+	struct nlattr *na_info;
+	struct nlattr *na_vlan;
+	int ret;
+
+	memset(buf, 0, sizeof(buf));
+	++vmwa->nl_sn;
+	if (!vmwa->nl_sn)
+		++vmwa->nl_sn;
+	nlh = (struct nlmsghdr *)buf;
+	nlh->nlmsg_len = sizeof(struct nlmsghdr);
+	nlh->nlmsg_type = RTM_NEWLINK;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+			   NLM_F_EXCL | NLM_F_ACK;
+	ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
+	nlh->nlmsg_len += sizeof(struct ifinfomsg);
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->ifi_type = 0;
+	ifm->ifi_index = 0;
+	ifm->ifi_flags = IFF_UP;
+	ifm->ifi_change = 0xffffffff;
+	nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
+	ret = snprintf(name, sizeof(name), "%s.%u.%u",
+		       MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
+	nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
+	na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
+	nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
+	na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
+	nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
+	nl_attr_nest_end(nlh, na_vlan);
+	nl_attr_nest_end(nlh, na_info);
+	assert(sizeof(buf) >= nlh->nlmsg_len);
+	ret = mlx5_nl_send(vmwa->nl_socket, nlh, vmwa->nl_sn);
+	if (ret >= 0)
+		ret = mlx5_nl_recv(vmwa->nl_socket, vmwa->nl_sn, NULL, NULL);
+	if (ret < 0) {
+		DRV_LOG(WARNING,
+			"netlink: VLAN %s create failure (%d)",
+			name, ret);
+	}
+	// Try to get ifindex of created or pre-existing device.
+	ret = if_nametoindex(name);
+	if (!ret) {
+		DRV_LOG(WARNING,
+			"VLAN %s failed to get index (%d)",
+			name, errno);
+		return 0;
+	}
+	return ret;
+}
+
+/*
+ * Release VLAN network device, created for VM workaround.
+ *
+ * @param[in] dev
+ *   Ethernet device object, Netlink context provider.
+ * @param[in] vlan
+ *   Object representing the network device to release.
+ */
+void mlx5_vlan_vmwa_release(struct rte_eth_dev *dev,
+			    struct mlx5_vf_vlan *vlan)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context;
+	struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0];
+
+	assert(vlan->created);
+	assert(priv->vmwa_context);
+	if (!vlan->created || !vmwa)
+		return;
+	vlan->created = 0;
+	assert(vlan_dev[vlan->tag].refcnt);
+	if (--vlan_dev[vlan->tag].refcnt == 0 &&
+	    vlan_dev[vlan->tag].ifindex) {
+		mlx5_vlan_vmwa_delete(vmwa, vlan_dev[vlan->tag].ifindex);
+		vlan_dev[vlan->tag].ifindex = 0;
+	}
+}
+
+/**
+ * Acquire VLAN interface with specified tag for VM workaround.
+ *
+ * @param[in] dev
+ *   Ethernet device object, Netlink context provider.
+ * @param[in] vlan
+ *   Object representing the network device to acquire.
+ */
+void mlx5_vlan_vmwa_acquire(struct rte_eth_dev *dev,
+			    struct mlx5_vf_vlan *vlan)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context;
+	struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0];
+
+	assert(!vlan->created);
+	assert(priv->vmwa_context);
+	if (vlan->created || !vmwa)
+		return;
+	if (vlan_dev[vlan->tag].refcnt == 0) {
+		assert(!vlan_dev[vlan->tag].ifindex);
+		vlan_dev[vlan->tag].ifindex =
+			mlx5_vlan_vmwa_create(vmwa,
+					      vmwa->vf_ifindex,
+					      vlan->tag);
+	}
+	if (vlan_dev[vlan->tag].ifindex) {
+		vlan_dev[vlan->tag].refcnt++;
+		vlan->created = 1;
+	}
+}
+
+/*
+ * Create per ethernet device VLAN VM workaround context
+ */
+struct mlx5_vlan_vmwa_context *
+mlx5_vlan_vmwa_init(struct rte_eth_dev *dev,
+		    uint32_t ifindex)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_config *config = &priv->config;
+	struct mlx5_vlan_vmwa_context *vmwa;
+	enum rte_hypervisor hv_type;
+
+	/* Do not engage workaround over PF. */
+	if (!config->vf)
+		return NULL;
+	/* Check whether there is desired virtual environment */
+	hv_type = rte_hypervisor_get();
+	switch (hv_type) {
+	case RTE_HYPERVISOR_UNKNOWN:
+	case RTE_HYPERVISOR_VMWARE:
+		/*
+		 * The "white list" of configurations
+		 * to engage the workaround.
+		 */
+		break;
+	default:
+		/*
+		 * The configuration is not found in the "white list".
+		 * We should not engage the VLAN workaround.
+		 */
+		return NULL;
+	}
+	vmwa = rte_zmalloc(__func__, sizeof(*vmwa), sizeof(uint32_t));
+	if (!vmwa) {
+		DRV_LOG(WARNING,
+			"Can not allocate memory"
+			" for VLAN workaround context");
+		return NULL;
+	}
+	vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE);
+	if (vmwa->nl_socket < 0) {
+		DRV_LOG(WARNING,
+			"Can not create Netlink socket"
+			" for VLAN workaround context");
+		rte_free(vmwa);
+		return NULL;
+	}
+	vmwa->nl_sn = random();
+	vmwa->vf_ifindex = ifindex;
+	vmwa->dev = dev;
+	/* Cleanup for existing VLAN devices. */
+	return vmwa;
+}
+
+/*
+ * Destroy per ethernet device VLAN VM workaround context
+ */
+void mlx5_vlan_vmwa_exit(struct mlx5_vlan_vmwa_context *vmwa)
+{
+	unsigned int i;
+
+	/* Delete all remaining VLAN devices. */
+	for (i = 0; i < RTE_DIM(vmwa->vlan_dev); i++) {
+		if (vmwa->vlan_dev[i].ifindex)
+			mlx5_vlan_vmwa_delete(vmwa, vmwa->vlan_dev[i].ifindex);
+	}
+	if (vmwa->nl_socket >= 0)
+		close(vmwa->nl_socket);
+	rte_free(vmwa);
+}
-- 
1.8.3.1



More information about the dev mailing list