[dpdk-dev] [PATCH 07/12] mbuf: generic support for TCP segmentation offload

Olivier Matz olivier.matz at 6wind.com
Mon Nov 10 16:59:21 CET 2014


Some of the NICs supported by DPDK have a possibility to accelerate TCP
traffic by using segmentation offload. The application prepares a packet
with valid TCP header with size up to 64K and deleguates the
segmentation to the NIC.

Implement the generic part of TCP segmentation offload in rte_mbuf. It
introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes)
and tso_segsz (MSS of packets).

To delegate the TCP segmentation to the hardware, the user has to:

- set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  PKT_TX_TCP_CKSUM)
- set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in
  the packet
- fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
- calculate the pseudo header checksum and set it in the TCP header,
  as required when doing hardware TCP checksum offload

The API is inspired from ixgbe hardware (the next commit adds the
support for ixgbe), but it seems generic enough to be used for other
hw/drivers in the future.

This commit also reworks the way l2_len and l3_len are used in igb
and ixgbe drivers as the l2_l3_len is not available anymore in mbuf.

Signed-off-by: Mirek Walukiewicz <miroslaw.walukiewicz at intel.com>
Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
 app/test-pmd/testpmd.c            |  3 ++-
 examples/ipv4_multicast/main.c    |  3 ++-
 lib/librte_mbuf/rte_mbuf.h        | 44 +++++++++++++++++++++++----------------
 lib/librte_pmd_e1000/igb_rxtx.c   | 11 +++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +++++++++-
 5 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 12adafa..a831e31 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -408,7 +408,8 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
 	mb->ol_flags     = 0;
 	mb->data_off     = RTE_PKTMBUF_HEADROOM;
 	mb->nb_segs      = 1;
-	mb->l2_l3_len       = 0;
+	mb->l2_len       = 0;
+	mb->l3_len       = 0;
 	mb->vlan_tci     = 0;
 	mb->hash.rss     = 0;
 }
diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
index de5e6be..a31d43d 100644
--- a/examples/ipv4_multicast/main.c
+++ b/examples/ipv4_multicast/main.c
@@ -302,7 +302,8 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
 	/* copy metadata from source packet*/
 	hdr->port = pkt->port;
 	hdr->vlan_tci = pkt->vlan_tci;
-	hdr->l2_l3_len = pkt->l2_l3_len;
+	hdr->l2_len = pkt->l2_len;
+	hdr->l3_len = pkt->l3_len;
 	hdr->hash = pkt->hash;
 
 	hdr->ol_flags = pkt->ol_flags;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index bcd8996..f76b768 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -126,6 +126,19 @@ extern "C" {
 
 #define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
 
+/**
+ * TCP segmentation offload. To enable this offload feature for a
+ * packet to be transmitted on hardware supporting TSO:
+ *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
+ *    PKT_TX_TCP_CKSUM)
+ *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
+ *    to 0 in the packet
+ *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
+ *  - calculate the pseudo header checksum and set it in the TCP header,
+ *    as required when doing hardware TCP checksum offload
+ */
+#define PKT_TX_TCP_SEG       (1ULL << 49)
+
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
 
@@ -185,6 +198,7 @@ static inline const char *rte_get_tx_ol_flag_name(uint64_t mask)
 	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
 	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
 	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
+	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
 	default: return NULL;
 	}
 }
@@ -264,22 +278,18 @@ struct rte_mbuf {
 
 	/* fields to support TX offloads */
 	union {
-		uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
+		uint64_t tx_offload;       /**< combined for easy fetch */
 		struct {
-			uint16_t l3_len:9;      /**< L3 (IP) Header Length. */
-			uint16_t l2_len:7;      /**< L2 (MAC) Header Length. */
-		};
-	};
+			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+			uint64_t tso_segsz:16; /**< TCP TSO segment size */
 
-	/* fields for TX offloading of tunnels */
-	union {
-		uint16_t inner_l2_l3_len;
-		/**< combined inner l2/l3 lengths as single var */
-		struct {
-			uint16_t inner_l3_len:9;
-			/**< inner L3 (IP) Header Length. */
-			uint16_t inner_l2_len:7;
-			/**< inner L2 (MAC) Header Length. */
+			/* fields for TX offloading of tunnels */
+			uint16_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
+			uint16_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
+
+			/* uint64_t unused:8; */
 		};
 	};
 } __rte_cache_aligned;
@@ -631,8 +641,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 {
 	m->next = NULL;
 	m->pkt_len = 0;
-	m->l2_l3_len = 0;
-	m->inner_l2_l3_len = 0;
+	m->tx_offload = 0;
 	m->vlan_tci = 0;
 	m->nb_segs = 1;
 	m->port = 0xff;
@@ -701,8 +710,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
 	mi->data_len = md->data_len;
 	mi->port = md->port;
 	mi->vlan_tci = md->vlan_tci;
-	mi->l2_l3_len = md->l2_l3_len;
-	mi->inner_l2_l3_len = md->inner_l2_l3_len;
+	mi->tx_offload = md->tx_offload;
 	mi->hash = md->hash;
 
 	mi->next = NULL;
diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index dbf5074..0a9447e 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -361,6 +361,13 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf     *tx_pkt;
 	struct rte_mbuf     *m_seg;
 	union igb_vlan_macip vlan_macip_lens;
+	union {
+		uint16_t u16;
+		struct {
+			uint16_t l3_len:9;
+			uint16_t l2_len:7;
+		};
+	} l2_l3_len;
 	uint64_t buf_dma_addr;
 	uint32_t olinfo_status;
 	uint32_t cmd_type_len;
@@ -398,8 +405,10 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
 
 		ol_flags = tx_pkt->ol_flags;
+		l2_l3_len.l2_len = tx_pkt->l2_len;
+		l2_l3_len.l3_len = tx_pkt->l3_len;
 		vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
-		vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
+		vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
 		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
 			PKT_TX_L4_MASK);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 70ca254..54a0fc1 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -540,6 +540,13 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf     *tx_pkt;
 	struct rte_mbuf     *m_seg;
 	union ixgbe_vlan_macip vlan_macip_lens;
+	union {
+		uint16_t u16;
+		struct {
+			uint16_t l3_len:9;
+			uint16_t l2_len:7;
+		};
+	} l2_l3_len;
 	uint64_t buf_dma_addr;
 	uint32_t olinfo_status;
 	uint32_t cmd_type_len;
@@ -583,8 +590,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
 			PKT_TX_L4_MASK);
 		if (tx_ol_req) {
+			l2_l3_len.l2_len = tx_pkt->l2_len;
+			l2_l3_len.l3_len = tx_pkt->l3_len;
 			vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
-			vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
+			vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
 
 			/* If new context need be built or reuse the exist ctx. */
 			ctx = what_advctx_update(txq, tx_ol_req,
-- 
2.1.0



More information about the dev mailing list