[dpdk-dev] [PATCH 1/6] net/mlx5: support tx swp tunnel offloading

Xueming Li xuemingl at mellanox.com
Tue Jan 9 15:11:05 CET 2018


This commit adds support for generic tunnel TSO and checksum offloads.
The PMD will compute the inner/outer headers offset according to the
mbuf fields. The Hardware will do calculation according to offsets and types.
Such capability is supported only for PFs.

Signed-off-by: Xueming Li <xuemingl at mellanox.com>

---
 drivers/net/mlx5/mlx5_prm.h  |  12 ++++
 drivers/net/mlx5/mlx5_rxtx.c | 163 ++++++++++++++++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h |  94 ++++++++++++++++++++-----
 drivers/net/mlx5/mlx5_txq.c  |   1 +
 4 files changed, 195 insertions(+), 75 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 2de310bcb..edf39c249 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -135,6 +135,18 @@
 /* Inner L4 checksum offload (Tunneled packets only). */
 #define MLX5_ETH_WQE_L4_INNER_CSUM (1u << 5)
 
+/* Outer L4 type is UDP. */
+#define MLX5_ETH_OUTER_L4_UDP  (1u << 5)
+
+/* Outer L3 type is IPV6. */
+#define MLX5_ETH_OUTER_L3_IPV6 (1u << 4)
+
+/* Inner L4 type is UDP. */
+#define MLX5_ETH_INNER_L4_UDP (1u << 1)
+
+/* Inner L3 type is IPV6. */
+#define MLX5_ETH_INNER_L3_IPV6 (1u << 0)
+
 /* Is flow mark valid. */
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #define MLX5_FLOW_MARK_IS_VALID(val) ((val) & 0xffffff00)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 3b8f71c28..d79f9fc0e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -247,6 +247,80 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
 }
 
 /**
+ * Inline TSO headers into WQE and set checksums fields.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param buf
+ *   Pointer to packet mbuf structure.
+ * @param raw
+ *   Double pointer to WQE current write offset.
+ * @param cs_flags
+ *   Pointer to checksums flags.
+ * @swp_offsets
+ *   Pointer to header offsets when using software parser.
+ * @swp_types
+ *   Pointer to header types when using software parser.
+ * @param max_wqe
+ *   Pointer to the available number of wqes.
+ *
+ * @return
+ *   Headers size which were copied into wqe upon success,
+ *   negative errno value otherwise, the following erros
+ *   are defined:
+ *
+ *   -EINVAL: invalid arugments for TSO. packet headers are too large
+ *   or not enough WQEs. cannot execute the TSO.
+ *
+ *   -ENOMEM: reached the end of WQ ring. the TSO WQE can be executed
+ *   only after the WQ ring wraparound.
+ */
+static int
+process_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf, uint8_t **raw,
+	    uint16_t *max_wqe)
+{
+	uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
+	volatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *)
+					 tx_mlx5_wqe(txq, txq->wqe_ci);
+	uint8_t *curr = *raw;
+	const uint8_t tunneled = txq->tunnel_en &&
+			       (buf->ol_flags & PKT_TX_TUNNEL_MASK);
+	uint16_t pkt_inline_sz = (uintptr_t)curr - (uintptr_t)wqe -
+				 (MLX5_WQE_DWORD_SIZE * 2 - 2);
+	uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;
+	uintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) +
+				    (1 << txq->wqe_n) * MLX5_WQE_SIZE);
+	unsigned int copy_b;
+	uint16_t tso_header_sz;
+
+	if (vlan_sz)
+		addr += 2 * ETHER_ADDR_LEN + 2;
+	else
+		addr += pkt_inline_sz;
+	tso_header_sz = buf->l2_len + vlan_sz +	buf->l3_len + buf->l4_len;
+	if (tunneled)
+		tso_header_sz += buf->outer_l2_len + buf->outer_l3_len;
+	if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) {
+		txq->stats.oerrors++;
+		return -EINVAL;
+	}
+	copy_b = tso_header_sz - pkt_inline_sz;
+	if (copy_b && ((end - (uintptr_t)curr) > copy_b)) {
+		uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+
+		if (unlikely(*max_wqe < n))
+			return -EINVAL;
+		*max_wqe -= n;
+		rte_memcpy((void *)curr, (void *)addr, copy_b);
+		/* Another DWORD will be added in the inline part. */
+		*raw = curr + MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE;
+	} else {
+		return -ENOMEM;
+	}
+	return copy_b;
+}
+
+/**
  * DPDK callback to check the status of a tx descriptor.
  *
  * @param tx_queue
@@ -376,6 +450,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		uint16_t ehdr;
 		uint8_t cs_flags;
 		uint64_t tso = 0;
+		uint32_t swp_offsets = 0;
+		uint8_t swp_types = 0;
 		uint16_t tso_segsz = 0;
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		uint32_t total_length = 0;
@@ -417,7 +493,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (pkts_n - i > 1)
 			rte_prefetch0(
 			    rte_pktmbuf_mtod(*(pkts + 1), volatile void *));
-		cs_flags = txq_ol_cksum_to_cs(txq, buf);
+		cs_flags = txq_ol_flags_to_verbs(txq, buf,
+						 (uint8_t *)&swp_offsets,
+						 &swp_types);
 		raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
 		/* Replace the Ethernet type by the VLAN if necessary. */
 		if (buf->ol_flags & PKT_TX_VLAN_PKT) {
@@ -445,69 +523,37 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		raw += MLX5_WQE_DWORD_SIZE;
 		tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
 		if (tso) {
-			uintptr_t end =
-				(uintptr_t)(((uintptr_t)txq->wqes) +
-					    (1 << txq->wqe_n) * MLX5_WQE_SIZE);
-			unsigned int copy_b;
-			uint8_t vlan_sz =
-				(buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;
-			const uint64_t is_tunneled =
-				buf->ol_flags & (PKT_TX_TUNNEL_GRE |
-						 PKT_TX_TUNNEL_VXLAN);
-
-			tso_header_sz = buf->l2_len + vlan_sz +
-					buf->l3_len + buf->l4_len;
-			tso_segsz = buf->tso_segsz;
-			if (unlikely(tso_segsz == 0)) {
-				txq->stats.oerrors++;
-				break;
-			}
-			if (is_tunneled	&& txq->tunnel_en) {
-				tso_header_sz += buf->outer_l2_len +
-						 buf->outer_l3_len;
-				cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
-			} else {
-				cs_flags |= MLX5_ETH_WQE_L4_CSUM;
-			}
-			if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) {
-				txq->stats.oerrors++;
-				break;
-			}
-			copy_b = tso_header_sz - pkt_inline_sz;
-			/* First seg must contain all headers. */
-			assert(copy_b <= length);
-			if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
-				uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+			int ret;
 
-				if (unlikely(max_wqe < n))
-					break;
-				max_wqe -= n;
-				rte_memcpy((void *)raw, (void *)addr, copy_b);
-				addr += copy_b;
-				length -= copy_b;
-				/* Include padding for TSO header. */
-				copy_b = MLX5_WQE_DS(copy_b) *
-					 MLX5_WQE_DWORD_SIZE;
-				pkt_inline_sz += copy_b;
-				raw += copy_b;
-			} else {
+			ret = process_tso(txq, buf, &raw, &max_wqe);
+			if (ret == -EINVAL) {
+				break;
+			} else if (ret == -ENOMEM) {
 				/* NOP WQE. */
 				wqe->ctrl = (rte_v128u32_t){
-					rte_cpu_to_be_32(txq->wqe_ci << 8),
-					rte_cpu_to_be_32(txq->qp_num_8s | 1),
-					0,
-					0,
+						rte_cpu_to_be_32(txq->wqe_ci << 8),
+						rte_cpu_to_be_32(txq->qp_num_8s | 1),
+						0,
+						0,
 				};
 				ds = 1;
-#ifdef MLX5_PMD_SOFT_COUNTERS
 				total_length = 0;
-#endif
 				k++;
 				goto next_wqe;
+			} else {
+				tso_segsz = buf->tso_segsz;
+				if (unlikely(tso_segsz == 0)) {
+					txq->stats.oerrors++;
+					break;
+				}
+				addr += ret;
+				length -= ret;
+				pkt_inline_sz += ret;
+				tso_header_sz = pkt_inline_sz;
 			}
 		}
 		/* Inline if enough room. */
-		if (max_inline || tso) {
+		if (max_inline || unlikely(tso)) {
 			uint32_t inl = 0;
 			uintptr_t end = (uintptr_t)
 				(((uintptr_t)txq->wqes) +
@@ -652,7 +698,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		++i;
 		j += sg;
 		/* Initialize known and common part of the WQE structure. */
-		if (tso) {
+		if (unlikely(tso)) {
 			wqe->ctrl = (rte_v128u32_t){
 				rte_cpu_to_be_32((txq->wqe_ci << 8) |
 						 MLX5_OPCODE_TSO),
@@ -661,8 +707,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				0,
 			};
 			wqe->eseg = (rte_v128u32_t){
-				0,
-				cs_flags | (rte_cpu_to_be_16(tso_segsz) << 16),
+				swp_offsets,
+				cs_flags | (swp_types << 8) |
+					(rte_cpu_to_be_16(tso_segsz) << 16),
 				0,
 				(ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
 			};
@@ -675,8 +722,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				0,
 			};
 			wqe->eseg = (rte_v128u32_t){
-				0,
-				cs_flags,
+				swp_offsets,
+				cs_flags | (swp_types << 8),
 				0,
 				(ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
 			};
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 4ade8bee1..852594708 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -196,6 +196,7 @@ struct mlx5_txq_data {
 	uint16_t tso_en:1; /* When set hardware TSO is enabled. */
 	uint16_t tunnel_en:1;
 	/* When set TX offload for tunneled packets are supported. */
+	uint16_t swp_en:1; /* When set software parser is supported. */
 	uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
 	uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
@@ -623,40 +624,99 @@ mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
 }
 
 /**
- * Convert the Checksum offloads to Verbs.
+ * Convert mbuf tx offloads info to Verbs.
  *
  * @param txq_data
  *   Pointer to the Tx queue.
  * @param buf
  *   Pointer to the mbuf.
+ * @param offsets
+ *   Pointer to the header offsets.
+ * @param swp_types
+ *   Pointer to the swp types.
  *
  * @return
  *   the converted cs_flags.
  */
 static __rte_always_inline uint8_t
-txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf)
+txq_ol_flags_to_verbs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf,
+		   uint8_t *offsets, uint8_t *swp_types)
 {
 	uint8_t cs_flags = 0;
-
-	/* Should we enable HW CKSUM offload */
-	if (buf->ol_flags &
-	    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-		if (txq_data->tunnel_en &&
-		    (buf->ol_flags &
-		     (PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN))) {
-			cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
-				   MLX5_ETH_WQE_L4_INNER_CSUM;
-			if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
-				cs_flags |= MLX5_ETH_WQE_L3_CSUM;
-		} else {
-			cs_flags = MLX5_ETH_WQE_L3_CSUM |
-				   MLX5_ETH_WQE_L4_CSUM;
-		}
+	uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;
+	const uint8_t tunnel = txq_data->tunnel_en &&
+			       (buf->ol_flags & PKT_TX_TUNNEL_MASK);
+	const uint8_t tso = txq_data->tso_en &&
+			    (buf->ol_flags & PKT_TX_TCP_SEG);
+	uint16_t off = buf->outer_l2_len + vlan_sz;
+
+	if (likely(!tso && !(buf->ol_flags &
+	    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM |
+	     PKT_TX_OUTER_IP_CKSUM))))
+		return cs_flags;
+	if (likely(!tunnel)) {
+		if (buf->ol_flags & PKT_TX_IP_CKSUM)
+			cs_flags = MLX5_ETH_WQE_L3_CSUM;
+		if (tso || (buf->ol_flags & PKT_TX_L4_MASK))
+			cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+		return cs_flags;
+	}
+	/* Tunneled packets */
+	if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+		cs_flags = MLX5_ETH_WQE_L3_CSUM;
+	if (buf->ol_flags & PKT_TX_IP_CKSUM)
+		cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+	if (tso || (buf->ol_flags & PKT_TX_L4_MASK))
+		cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
+	if (!txq_data->swp_en) /* HW offloading, only set csum flags*/
+		return cs_flags;
+	/* SW Parer enabled */
+	if (tso || (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)) {
+		offsets[1] = off >> 1; /* Outer L3 offset */
+		if (buf->ol_flags & PKT_TX_OUTER_IPV6)
+			*swp_types |= MLX5_ETH_OUTER_L3_IPV6;
+	}
+	off += buf->outer_l3_len;
+	/* TODO is outer L4 required? */
+	if (tso && (buf->ol_flags & PKT_TX_TUNNEL_VXLAN)) {
+		offsets[0] = off >> 1; /* Outer L4 offset */
+		*swp_types |= MLX5_ETH_OUTER_L4_UDP;
+	}
+	off += buf->l2_len;
+	if (tso || (buf->ol_flags & PKT_TX_IP_CKSUM)) {
+		offsets[3] = off >> 1; /* Inner L3 offset */
+		if (buf->ol_flags & PKT_TX_IPV6)
+			*swp_types |= MLX5_ETH_INNER_L3_IPV6;
+	}
+	if (tso || (buf->ol_flags & PKT_TX_L4_MASK)) {
+		off += buf->l3_len;
+		offsets[2] = off >> 1; /* Inner L4 offset */
+		if ((buf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
+			*swp_types |= MLX5_ETH_INNER_L4_UDP;
 	}
 	return cs_flags;
 }
 
 /**
+ * Convert the Checksum offloads to Verbs.
+ *
+ * @param txq_data
+ *   Pointer to the Tx queue.
+ * @param buf
+ *   Pointer to the mbuf.
+ *
+ * @return
+ *   the converted cs_flags.
+ */
+static __rte_always_inline uint8_t
+txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf)
+{
+	uint32_t offsets;
+	uint8_t swp_types;
+	return txq_ol_flags_to_verbs(txq_data, buf, (uint8_t *)&offsets, &swp_types);
+}
+
+/**
  * Count the number of contiguous single segment packets.
  *
  * @param pkts
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index b81c85fed..bd7ba0834 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -729,6 +729,7 @@ txq_set_params(struct mlx5_txq_ctrl *txq_ctrl)
 		txq_ctrl->txq.tso_en = 1;
 	}
 	txq_ctrl->txq.tunnel_en = config->tunnel_en;
+	txq_ctrl->txq.swp_en = 1;
 }
 
 /**
-- 
2.13.3



More information about the dev mailing list