[dpdk-dev] [PATCH] ixgbe: avoid unnessary break when checking at the tail of rx hwring

Jianbo Liu jianbo.liu at linaro.org
Mon Mar 14 15:25:58 CET 2016


When checking rx ring queue, it's possible that loop will break at the tail
while there are packets still in the queue header.

Signed-off-by: Jianbo Liu <jianbo.liu at linaro.org>
---
 drivers/net/ixgbe/ixgbe_rxtx_vec.c | 68 +++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index ccd93c7..611e431 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -206,10 +206,9 @@ static inline uint16_t
 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
-	volatile union ixgbe_adv_rx_desc *rxdp;
+	volatile union ixgbe_adv_rx_desc *rxdp, *rxdp_end;
 	struct ixgbe_rx_entry *sw_ring;
-	uint16_t nb_pkts_recd;
-	int pos;
+	uint16_t rev;
 	uint64_t var;
 	__m128i shuf_msk;
 	__m128i crc_adjust = _mm_set_epi16(
@@ -232,6 +231,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles */
 	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp_end = rxq->rx_ring + rxq->nb_rx_desc;
 
 	_mm_prefetch((const void *)rxdp, _MM_HINT_T0);
 
@@ -275,9 +275,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * [C*. extract the end-of-packet bit, if requested]
 	 * D. fill info. from desc to mbuf
 	 */
-	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-			pos += RTE_IXGBE_DESCS_PER_LOOP,
-			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
+	for (rev = 0; rev < nb_pkts; ) {
 		__m128i descs0[RTE_IXGBE_DESCS_PER_LOOP];
 		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
 		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
@@ -285,17 +283,17 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		__m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */
 
 		/* B.1 load 1 mbuf point */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[0]);
 
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load 4 pkts desc */
 		descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 
 		/* B.2 copy 2 mbuf point into rx_pkts  */
-		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+		_mm_storeu_si128((__m128i *)&rx_pkts[rev], mbp1);
 
 		/* B.1 load 1 mbuf point */
-		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
+		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[2]);
 
 		descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
 		/* B.1 load 2 mbuf point */
@@ -303,13 +301,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		descs0[0] = _mm_loadu_si128((__m128i *)(rxdp));
 
 		/* B.2 copy 2 mbuf point into rx_pkts  */
-		_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
+		_mm_storeu_si128((__m128i *)&rx_pkts[rev + 2], mbp2);
 
 		if (split_packet) {
-			rte_prefetch0(&rx_pkts[pos]->cacheline1);
-			rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
-			rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
-			rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
+			rte_prefetch0(&rx_pkts[rev]->cacheline1);
+			rte_prefetch0(&rx_pkts[rev + 1]->cacheline1);
+			rte_prefetch0(&rx_pkts[rev + 2]->cacheline1);
+			rte_prefetch0(&rx_pkts[rev + 3]->cacheline1);
 		}
 
 		/* A* mask out 0~3 bits RSS type */
@@ -333,7 +331,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
 
 		/* set ol_flags with vlan packet type */
-		desc_to_olflags_v(descs0, &rx_pkts[pos]);
+		desc_to_olflags_v(descs0, &rx_pkts[rev]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
 		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
@@ -348,9 +346,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
 
 		/* D.3 copy final 3,4 data to rx_pkts */
-		_mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
+		_mm_storeu_si128((void *)&rx_pkts[rev+3]->rx_descriptor_fields1,
 				pkt_mb4);
-		_mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
+		_mm_storeu_si128((void *)&rx_pkts[rev+2]->rx_descriptor_fields1,
 				pkt_mb3);
 
 		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
@@ -375,13 +373,12 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
 			/* store the resulting 32-bit value */
 			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
-			split_packet += RTE_IXGBE_DESCS_PER_LOOP;
 
 			/* zero-out next pointers */
-			rx_pkts[pos]->next = NULL;
-			rx_pkts[pos + 1]->next = NULL;
-			rx_pkts[pos + 2]->next = NULL;
-			rx_pkts[pos + 3]->next = NULL;
+			rx_pkts[rev]->next = NULL;
+			rx_pkts[rev + 1]->next = NULL;
+			rx_pkts[rev + 2]->next = NULL;
+			rx_pkts[rev + 3]->next = NULL;
 		}
 
 		/* C.3 calc available number of desc */
@@ -389,24 +386,35 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		staterr = _mm_packs_epi32(staterr, zero);
 
 		/* D.3 copy final 1,2 data to rx_pkts */
-		_mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
+		_mm_storeu_si128((void *)&rx_pkts[rev+1]->rx_descriptor_fields1,
 				pkt_mb2);
-		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+		_mm_storeu_si128((void *)&rx_pkts[rev]->rx_descriptor_fields1,
 				pkt_mb1);
 
 		/* C.4 calc avaialbe number of desc */
 		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-		nb_pkts_recd += var;
-		if (likely(var != RTE_IXGBE_DESCS_PER_LOOP))
+		if (unlikely(var == 0))
 			break;
+		else {
+			if (split_packet)
+				 split_packet += var;
+
+			rev += var;
+			sw_ring += var;
+			rxdp += var;
+			if (rxdp == rxdp_end) {
+				sw_ring = rxq->sw_ring;
+				rxdp = rxq->rx_ring;
+			} else if (var < RTE_IXGBE_DESCS_PER_LOOP)
+				break;
+		}
 	}
 
 	/* Update our internal tail pointer */
-	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
-	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
-	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+	rxq->rx_tail = rxdp - rxq->rx_ring;
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + rev);
 
-	return nb_pkts_recd;
+	return rev;
 }
 
 /*
-- 
1.8.3.1



More information about the dev mailing list