[dpdk-dev] [Patch 2/2] i40e rx Bulk Alloc: Larger list size (33 to 128) throughput optimization

Polehn, Mike A mike.a.polehn at intel.com
Tue Oct 27 21:56:40 CET 2015


Added check of minimum of 2 packet allocation count to eliminate the extra overhead for 
supporting prefetch for the case of checking for only one packet allocated into the queue 
at a time.

Used some standard variables to help reduce overhead of non-standard variable sizes.

Added second level prefetch to get packet address in cache 0 earlier and eliminated
calculation inside loop to determine end of prefetch loop.

Used old time instruction C optimization methods of, using pointers instead of arrays, 
and reducing scope of some variables to improve chances of using register variables 
instead of a stack variables.

Signed-off-by: Mike A. Polehn <mike.a.polehn at intel.com>

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index ec62f75..2032e06 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -64,6 +64,7 @@
 #define DEFAULT_TX_FREE_THRESH 32
 #define I40E_MAX_PKT_TYPE      256
 #define I40E_RX_INPUT_BUF_MAX  256
+#define I40E_RX_FREE_THRESH_MIN  2
 
 #define I40E_TX_MAX_BURST  32
 
@@ -942,6 +943,12 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 			     "rxq->rx_free_thresh=%d",
 			     rxq->nb_rx_desc, rxq->rx_free_thresh);
 		ret = -EINVAL;
+	} else if (rxq->rx_free_thresh < I40E_RX_FREE_THRESH_MIN) {
+		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
+				"rxq->rx_free_thresh=%d, "
+				"I40E_RX_FREE_THRESH_MIN=%d",
+				rxq->rx_free_thresh, I40E_RX_FREE_THRESH_MIN);
+				ret = -EINVAL;
 	} else if (!(rxq->nb_rx_desc < (I40E_MAX_RING_DESC -
 				RTE_PMD_I40E_RX_MAX_BURST))) {
 		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
@@ -1058,9 +1065,8 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
 	struct i40e_rx_entry *rxep;
-	struct rte_mbuf *mb;
-	unsigned alloc_idx, i;
-	uint64_t dma_addr;
+	struct rte_mbuf *pk, *npk;
+	unsigned alloc_idx, i, l;
 	int diag;
 
 	/* Allocate buffers in bulk */
@@ -1076,22 +1082,36 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
+	pk = rxep->mbuf;
+	rte_prefetch0(pk);
+	rxep++;
+	npk = rxep->mbuf;
+	rte_prefetch0(npk);
+	rxep++;
+	l = rxq->rx_free_thresh - 2;
+
 	rxdp = &rxq->rx_ring[alloc_idx];
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
-		if (likely(i < (rxq->rx_free_thresh - 1)))
+		struct rte_mbuf *mb = pk;
+		pk = npk;
+		if (likely(i < l)) {
 			/* Prefetch next mbuf */
-			rte_prefetch0(rxep[i + 1].mbuf);
-
-		mb = rxep[i].mbuf;
-		rte_mbuf_refcnt_set(mb, 1);
-		mb->next = NULL;
+			npk = rxep->mbuf;
+			rte_prefetch0(npk);
+			rxep++;
+		}
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_mbuf_refcnt_set(mb, 1);
 		mb->nb_segs = 1;
 		mb->port = rxq->port_id;
-		dma_addr = rte_cpu_to_le_64(\
-			RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
+		mb->next = NULL;
+		{
+			uint64_t dma_addr = rte_cpu_to_le_64(
+				RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
+			rxdp->read.hdr_addr = dma_addr;
+			rxdp->read.pkt_addr = dma_addr;
+		}
+		rxdp++;
 	}
 
 	rxq->rx_last_pos = alloc_idx + rxq->rx_free_thresh - 1;



More information about the dev mailing list