[dpdk-dev] [PATCH 5/9] mbuf: make rearm data address naturally aligned

Olivier Matz olivier.matz at 6wind.com
Wed Mar 8 10:41:57 CET 2017


From: Jerin Jacob <jerin.jacob at caviumnetworks.com>

To avoid multiple stores on fast path, Ethernet drivers
aggregate the writes to data_off, refcnt, nb_segs and port
to an uint64_t data and write the data in one shot
with uint64_t* at &mbuf->rearm_data address.

Some of the non-IA platforms have store operation overhead
if the store address is not naturally aligned.This patch
fixes the performance issue on those targets.

Signed-off-by: Jerin Jacob <jerin.jacob at caviumnetworks.com>
Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
 drivers/net/fm10k/fm10k_rxtx_vec.c                            | 3 ---
 drivers/net/i40e/i40e_rxtx_vec_sse.c                          | 5 +----
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c                       | 3 ---
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c                        | 3 ---
 lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h | 3 +--
 lib/librte_mbuf/rte_mbuf.h                                    | 6 +++---
 6 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c
index 825e3c1..61a65e9 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -324,9 +324,6 @@ fm10k_rxq_rearm(struct fm10k_rx_queue *rxq)
 
 		/* Flush mbuf with pkt template.
 		 * Data to be rearmed is 6 bytes long.
-		 * Though, RX will overwrite ol_flags that are coming next
-		 * anyway. So overwrite whole 8 bytes with one load:
-		 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 		 */
 		p0 = (uintptr_t)&mb0->rearm_data;
 		*(uint64_t *)p0 = rxq->mbuf_initializer;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 2f861fd..e17235a 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -87,11 +87,8 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
 
-		 /* Flush mbuf with pkt template.
+		/* Flush mbuf with pkt template.
 		 * Data to be rearmed is 6 bytes long.
-		 * Though, RX will overwrite ol_flags that are coming next
-		 * anyway. So overwrite whole 8 bytes with one load:
-		 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 		 */
 		p0 = (uintptr_t)&mb0->rearm_data;
 		*(uint64_t *)p0 = rxq->mbuf_initializer;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index 2c04161..bc8924f 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -85,9 +85,6 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 		/*
 		 * Flush mbuf with pkt template.
 		 * Data to be rearmed is 6 bytes long.
-		 * Though, RX will overwrite ol_flags that are coming next
-		 * anyway. So overwrite whole 8 bytes with one load:
-		 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 		 */
 		vst1_u8((uint8_t *)&mb0->rearm_data, p);
 		paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index 65c5da3..62afe31 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -90,9 +90,6 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 		/*
 		 * Flush mbuf with pkt template.
 		 * Data to be rearmed is 6 bytes long.
-		 * Though, RX will overwrite ol_flags that are coming next
-		 * anyway. So overwrite whole 8 bytes with one load:
-		 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 		 */
 		p0 = (uintptr_t)&mb0->rearm_data;
 		*(uint64_t *)p0 = rxq->mbuf_initializer;
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 09713b0..f24f79f 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -116,11 +116,10 @@ struct rte_kni_fifo {
 struct rte_kni_mbuf {
 	void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
 	uint64_t buf_physaddr;
-	char pad0[2];
 	uint16_t data_off;      /**< Start address of data in segment buffer. */
 	char pad1[2];
 	uint8_t nb_segs;        /**< Number of segments. */
-	char pad4[1];
+	char pad4[3];
 	uint64_t ol_flags;      /**< Offload features. */
 	char pad2[4];
 	uint32_t pkt_len;       /**< Total pkt len: sum of all segment data_len. */
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index b4fe786..4dc9a20 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -393,10 +393,8 @@ struct rte_mbuf {
 	void *buf_addr;           /**< Virtual address of segment buffer. */
 	phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
 
-	uint16_t buf_len;         /**< Length of segment buffer. */
-
 	/* next 6 bytes are initialised on RX descriptor rearm */
-	MARKER8 rearm_data;
+	MARKER64 rearm_data;
 	uint16_t data_off;
 
 	/**
@@ -414,6 +412,7 @@ struct rte_mbuf {
 	};
 	uint8_t nb_segs;          /**< Number of segments. */
 	uint8_t port;             /**< Input port. */
+	uint16_t pad;             /**< 2B pad for naturally aligned ol_flags */
 
 	uint64_t ol_flags;        /**< Offload features. */
 
@@ -474,6 +473,7 @@ struct rte_mbuf {
 	/** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
 	uint16_t vlan_tci_outer;
 
+	uint16_t buf_len;         /**< Length of segment buffer. */
 	/* second cache line - fields only used in slow path or on TX */
 	MARKER cacheline1 __rte_cache_min_aligned;
 
-- 
2.8.1



More information about the dev mailing list