[dpdk-stable] patch 'drivers/net: fix vector Rx comments' has been queued to stable release 20.11.4

Xueming Li xuemingl at nvidia.com
Wed Nov 10 07:28:08 CET 2021


Hi,

FYI, your patch has been queued to stable release 20.11.4

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 11/12/21. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/steevenlee/dpdk

This queued commit can be viewed at:
https://github.com/steevenlee/dpdk/commit/39a30eb8841d7177a1a61bc4a432a79c9e5e86e6

Thanks.

Xueming Li <xuemingl at nvidia.com>

---
>From 39a30eb8841d7177a1a61bc4a432a79c9e5e86e6 Mon Sep 17 00:00:00 2001
From: Feifei Wang <feifei.wang2 at arm.com>
Date: Fri, 23 Jul 2021 11:10:47 +0800
Subject: [PATCH] drivers/net: fix vector Rx comments
Cc: Xueming Li <xuemingl at nvidia.com>

[ upstream commit 4f76ac98b787dbc893d9aaae487f79e69cb962c4 ]

For the loop to process packets in Rx vector path, some notes for the
code are wrong, fix these errors.

Fixes: 7092be8437bd ("fm10k: add vector Rx")
Fixes: c3def6a8724c ("net/i40e: implement vector PMD for altivec")
Fixes: ae0eb310f253 ("net/i40e: implement vector PMD for ARM")
Fixes: 9ed94e5bb04e ("i40e: add vector Rx")
Fixes: 319c421f3890 ("net/avf: enable SSE Rx Tx")
Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
Fixes: c68a52b8b38c ("net/ice: support vector SSE in Rx")
Fixes: cf4b4708a88a ("ixgbe: improve slow-path perf with vector scattered Rx")

Suggested-by: Ruifeng Wang <ruifeng.wang at arm.com>
Signed-off-by: Feifei Wang <feifei.wang2 at arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>
---
 drivers/net/fm10k/fm10k_rxtx_vec.c       | 4 ++--
 drivers/net/i40e/i40e_rxtx_vec_altivec.c | 8 ++++----
 drivers/net/i40e/i40e_rxtx_vec_neon.c    | 8 ++++----
 drivers/net/i40e/i40e_rxtx_vec_sse.c     | 4 ++--
 drivers/net/iavf/iavf_rxtx_vec_sse.c     | 8 ++++----
 drivers/net/ice/ice_rxtx_vec_sse.c       | 4 ++--
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c   | 4 ++--
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c
index b3c31ff198..50e76b1083 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -472,7 +472,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		mbp1 = _mm_loadu_si128((__m128i *)&mbufp[pos]);
 
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 		rte_compiler_barrier();
 
@@ -484,9 +484,9 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]);
 #endif
 
+		/* A.1 load desc[2-0] */
 		descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
 		rte_compiler_barrier();
-		/* B.1 load 2 mbuf point */
 		descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
 		rte_compiler_barrier();
 		descs0[0] = _mm_loadu_si128((__m128i *)(rxdp));
diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
index 0f03d5e0fa..161870f505 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
@@ -281,22 +281,22 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 						  * in one XMM reg.
 						  */
 
-		/* B.1 load 1 mbuf point */
+		/* B.1 load 2 mbuf point */
 		mbp1 = *(vector unsigned long *)&sw_ring[pos];
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs[3] = *(vector unsigned long *)(rxdp + 3);
 		rte_compiler_barrier();
 
 		/* B.2 copy 2 mbuf point into rx_pkts  */
 		*(vector unsigned long *)&rx_pkts[pos] = mbp1;
 
-		/* B.1 load 1 mbuf point */
+		/* B.1 load 2 mbuf point */
 		mbp2 = *(vector unsigned long *)&sw_ring[pos + 2];
 
+		/* A.1 load desc[2-0] */
 		descs[2] = *(vector unsigned long *)(rxdp + 2);
 		rte_compiler_barrier();
-		/* B.1 load 2 mbuf point */
 		descs[1] = *(vector unsigned long *)(rxdp + 1);
 		rte_compiler_barrier();
 		descs[0] = *(vector unsigned long *)(rxdp);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 278c523a8f..0df315b162 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -280,20 +280,20 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 
 		int32x4_t len_shl = {0, 0, 0, PKTLEN_SHIFT};
 
-		/* B.1 load 1 mbuf point */
+		/* B.1 load 2 mbuf point */
 		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs[3] =  vld1q_u64((uint64_t *)(rxdp + 3));
 
 		/* B.2 copy 2 mbuf point into rx_pkts  */
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
 
-		/* B.1 load 1 mbuf point */
+		/* B.1 load 2 mbuf point */
 		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
 
+		/* A.1 load desc[2-0] */
 		descs[2] =  vld1q_u64((uint64_t *)(rxdp + 2));
-		/* B.1 load 2 mbuf point */
 		descs[1] =  vld1q_u64((uint64_t *)(rxdp + 1));
 		descs[0] =  vld1q_u64((uint64_t *)(rxdp));
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index e9b4258bbc..8090a3a370 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -462,7 +462,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
 		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 		rte_compiler_barrier();
 
@@ -474,9 +474,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
 #endif
 
+		/* A.1 load desc[2-0] */
 		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
 		rte_compiler_barrier();
-		/* B.1 load 2 mbuf point */
 		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
 		rte_compiler_barrier();
 		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 52919182b6..5d9a1bb809 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -494,7 +494,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
 		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 		rte_compiler_barrier();
 
@@ -506,9 +506,9 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
 #endif
 
+		/* A.1 load desc[2-0] */
 		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
 		rte_compiler_barrier();
-		/* B.1 load 2 mbuf point */
 		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
 		rte_compiler_barrier();
 		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
@@ -755,7 +755,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
 		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 		rte_compiler_barrier();
 
@@ -767,9 +767,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
 #endif
 
+		/* A.1 load desc[2-0] */
 		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
 		rte_compiler_barrier();
-		/* B.1 load 2 mbuf point */
 		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
 		rte_compiler_barrier();
 		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index fb0d1621a5..e624fd54b8 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -416,7 +416,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
 		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 		rte_compiler_barrier();
 
@@ -428,9 +428,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
 #endif
 
+		/* A.1 load desc[2-0] */
 		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
 		rte_compiler_barrier();
-		/* B.1 load 2 mbuf point */
 		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
 		rte_compiler_barrier();
 		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index cbd907f2b9..d0377410d6 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -454,7 +454,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
 
 		/* Read desc statuses backwards to avoid race condition */
-		/* A.1 load 4 pkts desc */
+		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 		rte_compiler_barrier();
 
@@ -466,9 +466,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
 #endif
 
+		/* A.1 load desc[2-0] */
 		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
 		rte_compiler_barrier();
-		/* B.1 load 2 mbuf point */
 		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
 		rte_compiler_barrier();
 		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
-- 
2.33.0

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2021-11-10 14:17:02.311513117 +0800
+++ 0004-drivers-net-fix-vector-Rx-comments.patch	2021-11-10 14:17:01.737413849 +0800
@@ -1 +1 @@
-From 4f76ac98b787dbc893d9aaae487f79e69cb962c4 Mon Sep 17 00:00:00 2001
+From 39a30eb8841d7177a1a61bc4a432a79c9e5e86e6 Mon Sep 17 00:00:00 2001
@@ -4,0 +5,3 @@
+Cc: Xueming Li <xuemingl at nvidia.com>
+
+[ upstream commit 4f76ac98b787dbc893d9aaae487f79e69cb962c4 ]
@@ -17 +19,0 @@
-Cc: stable at dpdk.org
@@ -33 +35 @@
-index cae5322d48..83af01dc2d 100644
+index b3c31ff198..50e76b1083 100644
@@ -57 +59 @@
-index edaa462ac8..b99323992f 100644
+index 0f03d5e0fa..161870f505 100644
@@ -88 +90 @@
-index 32336fdb80..fb624a4882 100644
+index 278c523a8f..0df315b162 100644
@@ -117 +119 @@
-index 03a0320353..b235502db5 100644
+index e9b4258bbc..8090a3a370 100644
@@ -141 +143 @@
-index b813d96ef4..ee1e905525 100644
+index 52919182b6..5d9a1bb809 100644
@@ -185 +187 @@
-index 5f7e13ee39..653bd28b41 100644
+index fb0d1621a5..e624fd54b8 100644
@@ -209 +211 @@
-index 3a3ef51172..1dea95e73b 100644
+index cbd907f2b9..d0377410d6 100644


More information about the stable mailing list