[dpdk-dev] [PATCH v2] net/mlx5: support MPLS-in-GRE and MPLS-in-UDP

Matan Azrad matan at mellanox.com
Tue May 15 13:07:14 CEST 2018


Add support for MPLS over GRE and MPLS over UDP tunnel types as
described in the next RFCs:
1. https://tools.ietf.org/html/rfc4023
2. https://tools.ietf.org/html/rfc7510
3. https://tools.ietf.org/html/rfc4385

Signed-off-by: Matan Azrad <matan at mellanox.com>
---
 doc/guides/nics/mlx5.rst     |   4 +-
 drivers/net/mlx5/Makefile    |   5 ++
 drivers/net/mlx5/mlx5.c      |  13 ++++
 drivers/net/mlx5/mlx5.h      |   1 +
 drivers/net/mlx5/mlx5_flow.c | 161 +++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 176 insertions(+), 8 deletions(-)


V2:
Ignore void items between GRE and MPLS tunnels (Nelio suggestion).


diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a7d5c90..2b110f4 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -74,7 +74,7 @@ Features
 - RX interrupts.
 - Statistics query including Basic, Extended and per queue.
 - Rx HW timestamp.
-- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE.
+- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE, MPLSoGRE, MPLSoUDP.
 - Tunnel HW offloads: packet type, inner/outer RSS, IP and UDP checksum verification.
 
 Limitations
@@ -113,6 +113,8 @@ Limitations
 
 - VXLAN TSO and checksum offloads are not supported on VM.
 
+- L3 VXLAN and VXLAN-GPE tunnels cannot be supported together with MPLSoGRE and MPLSoUDP.
+
 - VF: flow rules created on VF devices can only match traffic targeted at the
   configured MAC addresses (see ``rte_eth_dev_mac_addr_add()``).
 
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 8d64d4c..293144e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -108,6 +108,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		enum MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
+		HAVE_IBV_DEVICE_MPLS_SUPPORT \
+		infiniband/verbs.h \
+		enum IBV_FLOW_SPEC_MPLS \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_IBV_WQ_FLAG_RX_END_PADDING \
 		infiniband/verbs.h \
 		enum IBV_WQ_FLAG_RX_END_PADDING \
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8aa91cc..225ebd4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -690,6 +690,7 @@
 	unsigned int mps;
 	unsigned int cqe_comp;
 	unsigned int tunnel_en = 0;
+	unsigned int mpls_en = 0;
 	unsigned int swp = 0;
 	unsigned int verb_priorities = 0;
 	unsigned int mprq = 0;
@@ -850,6 +851,17 @@
 	DRV_LOG(WARNING,
 		"tunnel offloading disabled due to old OFED/rdma-core version");
 #endif
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	mpls_en = ((attrs_out.tunnel_offloads_caps &
+		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) &&
+		   (attrs_out.tunnel_offloads_caps &
+		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP));
+	DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported",
+		mpls_en ? "" : "not ");
+#else
+	DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to"
+		" old OFED/rdma-core version or firmware configuration");
+#endif
 	err = mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr);
 	if (err) {
 		DEBUG("ibv_query_device_ex() failed");
@@ -873,6 +885,7 @@
 			.cqe_comp = cqe_comp,
 			.mps = mps,
 			.tunnel_en = tunnel_en,
+			.mpls_en = mpls_en,
 			.tx_vec_en = 1,
 			.rx_vec_en = 1,
 			.mpw_hdr_dseg = 0,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c4c962b..7750832 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -93,6 +93,7 @@ struct mlx5_dev_config {
 	unsigned int mps:2; /* Multi-packet send supported mode. */
 	unsigned int tunnel_en:1;
 	/* Whether tunnel stateless offloads are supported. */
+	unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
 	unsigned int flow_counter_en:1; /* Whether flow counter is supported. */
 	unsigned int cqe_comp:1; /* CQE compression is enabled. */
 	unsigned int tso:1; /* Whether TSO is supported. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 7af1dfa..3af9524 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -101,6 +101,11 @@ struct mlx5_flow_data {
 		     const void *default_mask,
 		     struct mlx5_flow_data *data);
 
+static int
+mlx5_flow_create_mpls(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      struct mlx5_flow_data *data);
+
 struct mlx5_flow_parse;
 
 static void
@@ -248,12 +253,14 @@ struct rte_flow {
 #define IS_TUNNEL(type) ( \
 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
-	(type) == RTE_FLOW_ITEM_TYPE_GRE)
+	(type) == RTE_FLOW_ITEM_TYPE_GRE || \
+	(type) == RTE_FLOW_ITEM_TYPE_MPLS)
 
 const uint32_t flow_ptype[] = {
 	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
 	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
+	[RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
 };
 
 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
@@ -264,6 +271,10 @@ struct rte_flow {
 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]	= RTE_PTYPE_TUNNEL_VXLAN_GPE |
 						  RTE_PTYPE_L4_UDP,
 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
+	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
+		RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
+	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
+		RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
 };
 
 /** Structure to generate a simple graph of layers supported by the NIC. */
@@ -400,7 +411,8 @@ struct mlx5_flow_items {
 	},
 	[RTE_FLOW_ITEM_TYPE_UDP] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
-			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
+			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
+			       RTE_FLOW_ITEM_TYPE_MPLS),
 		.actions = valid_actions,
 		.mask = &(const struct rte_flow_item_udp){
 			.hdr = {
@@ -429,7 +441,8 @@ struct mlx5_flow_items {
 	[RTE_FLOW_ITEM_TYPE_GRE] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
 			       RTE_FLOW_ITEM_TYPE_IPV4,
-			       RTE_FLOW_ITEM_TYPE_IPV6),
+			       RTE_FLOW_ITEM_TYPE_IPV6,
+			       RTE_FLOW_ITEM_TYPE_MPLS),
 		.actions = valid_actions,
 		.mask = &(const struct rte_flow_item_gre){
 			.protocol = -1,
@@ -437,7 +450,26 @@ struct mlx5_flow_items {
 		.default_mask = &rte_flow_item_gre_mask,
 		.mask_sz = sizeof(struct rte_flow_item_gre),
 		.convert = mlx5_flow_create_gre,
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+		.dst_sz = sizeof(struct ibv_flow_spec_gre),
+#else
 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
+#endif
+	},
+	[RTE_FLOW_ITEM_TYPE_MPLS] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
+			       RTE_FLOW_ITEM_TYPE_IPV4,
+			       RTE_FLOW_ITEM_TYPE_IPV6),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_mpls){
+			.label_tc_s = "\xff\xff\xf0",
+		},
+		.default_mask = &rte_flow_item_mpls_mask,
+		.mask_sz = sizeof(struct rte_flow_item_mpls),
+		.convert = mlx5_flow_create_mpls,
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+		.dst_sz = sizeof(struct ibv_flow_spec_mpls),
+#endif
 	},
 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
@@ -865,6 +897,7 @@ struct ibv_spec_header {
 	struct priv *priv = dev->data->dev_private;
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 	unsigned int i;
+	unsigned int last_voids = 0;
 	int ret = 0;
 
 	/* Initialise the offsets to start after verbs attribute. */
@@ -874,8 +907,10 @@ struct ibv_spec_header {
 		const struct mlx5_flow_items *token = NULL;
 		unsigned int n;
 
-		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
+			last_voids++;
 			continue;
+		}
 		for (i = 0;
 		     cur_item->items &&
 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
@@ -896,7 +931,10 @@ struct ibv_spec_header {
 		if (ret)
 			goto exit_item_not_supported;
 		if (IS_TUNNEL(items->type)) {
-			if (parser->tunnel) {
+			if (parser->tunnel &&
+			    !((items - last_voids - 1)->type ==
+			      RTE_FLOW_ITEM_TYPE_GRE && items->type ==
+			      RTE_FLOW_ITEM_TYPE_MPLS)) {
 				rte_flow_error_set(error, ENOTSUP,
 						   RTE_FLOW_ERROR_TYPE_ITEM,
 						   items,
@@ -904,6 +942,16 @@ struct ibv_spec_header {
 						   " tunnel encapsulations.");
 				return -rte_errno;
 			}
+			if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
+			    !priv->config.mpls_en) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "MPLS not supported or"
+						   " disabled in firmware"
+						   " configuration.");
+				return -rte_errno;
+			}
 			if (!priv->config.tunnel_en &&
 			    parser->rss_conf.level > 1) {
 				rte_flow_error_set(error, ENOTSUP,
@@ -921,6 +969,7 @@ struct ibv_spec_header {
 			for (n = 0; n != hash_rxq_init_n; ++n)
 				parser->queue[n].offset += cur_item->dst_sz;
 		}
+		last_voids = 0;
 	}
 	if (parser->drop) {
 		parser->queue[HASH_RXQ_ETH].offset +=
@@ -1878,16 +1927,27 @@ struct ibv_spec_header {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
-		     const void *default_mask __rte_unused,
+mlx5_flow_create_gre(const struct rte_flow_item *item,
+		     const void *default_mask,
 		     struct mlx5_flow_data *data)
 {
 	struct mlx5_flow_parse *parser = data->parser;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	(void)default_mask;
 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
 	struct ibv_flow_spec_tunnel tunnel = {
 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
 		.size = size,
 	};
+#else
+	const struct rte_flow_item_gre *spec = item->spec;
+	const struct rte_flow_item_gre *mask = item->mask;
+	unsigned int size = sizeof(struct ibv_flow_spec_gre);
+	struct ibv_flow_spec_gre tunnel = {
+		.type = parser->inner | IBV_FLOW_SPEC_GRE,
+		.size = size,
+	};
+#endif
 	struct ibv_flow_spec_ipv4_ext *ipv4;
 	struct ibv_flow_spec_ipv6 *ipv6;
 	unsigned int i;
@@ -1899,6 +1959,20 @@ struct ibv_spec_header {
 	/* Default GRE to inner RSS. */
 	if (!parser->rss_conf.level)
 		parser->rss_conf.level = 2;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
+		tunnel.val.protocol = spec->protocol;
+		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
+		tunnel.mask.protocol = mask->protocol;
+		/* Remove unwanted bits from values. */
+		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
+		tunnel.val.protocol &= tunnel.mask.protocol;
+		tunnel.val.key &= tunnel.mask.key;
+	}
+#endif
 	/* Update encapsulation IP layer protocol. */
 	for (i = 0; i != hash_rxq_init_n; ++i) {
 		if (!parser->queue[i].ibv_attr)
@@ -1932,6 +2006,79 @@ struct ibv_spec_header {
 }
 
 /**
+ * Convert MPLS item to Verbs specification.
+ * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_create_mpls(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      struct mlx5_flow_data *data)
+{
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	(void)default_mask;
+	return rte_flow_error_set(data->error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_ITEM,
+				  item,
+				  "MPLS is not supported by driver");
+#else
+	const struct rte_flow_item_mpls *spec = item->spec;
+	const struct rte_flow_item_mpls *mask = item->mask;
+	struct mlx5_flow_parse *parser = data->parser;
+	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
+	struct ibv_flow_spec_mpls mpls = {
+		.type = IBV_FLOW_SPEC_MPLS,
+		.size = size,
+	};
+
+	parser->inner = IBV_FLOW_SPEC_INNER;
+	if (parser->layer == HASH_RXQ_UDPV4 ||
+	    parser->layer == HASH_RXQ_UDPV6) {
+		parser->tunnel =
+			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
+		parser->out_layer = parser->layer;
+	} else {
+		parser->tunnel =
+			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
+		/* parser->out_layer stays as in GRE out_layer. */
+	}
+	parser->layer = HASH_RXQ_TUNNEL;
+	/*
+	 * For MPLS-in-GRE, RSS level should have been set.
+	 * For MPLS-in-UDP, use outer RSS.
+	 */
+	if (!parser->rss_conf.level)
+		parser->rss_conf.level = 1;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		/*
+		 * The verbs label field includes the entire MPLS header:
+		 * bits 0:19 - label value field.
+		 * bits 20:22 - traffic class field.
+		 * bits 23 - bottom of stack bit.
+		 * bits 24:31 - ttl field.
+		 */
+		mpls.val.label = *(const uint32_t *)spec;
+		mpls.mask.label = *(const uint32_t *)mask;
+		/* Remove unwanted bits from values. */
+		mpls.val.label &= mpls.mask.label;
+	}
+	mlx5_flow_create_copy(parser, &mpls, size);
+	return 0;
+#endif
+}
+
+/**
  * Convert mark/flag action to Verbs specification.
  *
  * @param parser
-- 
1.9.5



More information about the dev mailing list