[dpdk-dev] [PATCH] nfp: avoid modulo operations for handling ring wrapping

Alejandro Lucero alejandro.lucero at netronome.com
Mon Dec 19 17:13:03 CET 2016


Having those modulo operations implies costly instructions execution,
what can be avoided with conditionals and unlikely clauses.

This change makes the software ring read and write indexes to be now
always within the ring size which has to be handled properly. The main
problem is when write pointer wraps and being less than the read pointer.
This happened before, but just with indexes type size (uint32_t) wrapping,
and in that case the processor does the right thing no requiring special
hanling by software.

This work has also led to discovering redundant pointers in the driver,
which have been removed.

Signed-off-by: Alejandro Lucero <alejandro.lucero at netronome.com>
---
 drivers/net/nfp/nfp_net.c     | 66 +++++++++++++++++++++----------------------
 drivers/net/nfp/nfp_net_pmd.h |  5 +---
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index be0fefa..0e6bf4c 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -308,7 +308,6 @@ enum nfp_qcp_ptr {
 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
 {
 	nfp_net_rx_queue_release_mbufs(rxq);
-	rxq->wr_p = 0;
 	rxq->rd_p = 0;
 	rxq->nb_rx_hold = 0;
 }
@@ -347,8 +346,6 @@ enum nfp_qcp_ptr {
 	nfp_net_tx_queue_release_mbufs(txq);
 	txq->wr_p = 0;
 	txq->rd_p = 0;
-	txq->tail = 0;
-	txq->qcp_rd_p = 0;
 }
 
 static int
@@ -1114,8 +1111,7 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		return 0;
 	}
 
-	idx = rxq->rd_p % rxq->rx_count;
-	rxds = &rxq->rxds[idx];
+	idx = rxq->rd_p;
 
 	count = 0;
 
@@ -1414,8 +1410,6 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
 		rxe[i].mbuf = mbuf;
 		PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64 "\n", i, dma_addr);
-
-		rxq->wr_p++;
 	}
 
 	/* Make sure all writes are flushed before telling the hardware */
@@ -1499,7 +1493,6 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	}
 
 	txq->tx_count = nb_desc;
-	txq->tail = 0;
 	txq->tx_free_thresh = tx_free_thresh;
 	txq->tx_pthresh = tx_conf->tx_thresh.pthresh;
 	txq->tx_hthresh = tx_conf->tx_thresh.hthresh;
@@ -1691,7 +1684,6 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	struct nfp_net_hw *hw;
 	struct rte_mbuf *mb;
 	struct rte_mbuf *new_mb;
-	int idx;
 	uint16_t nb_hold;
 	uint64_t dma_addr;
 	int avail;
@@ -1711,9 +1703,7 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	nb_hold = 0;
 
 	while (avail < nb_pkts) {
-		idx = rxq->rd_p % rxq->rx_count;
-
-		rxb = &rxq->rxbufs[idx];
+		rxb = &rxq->rxbufs[rxq->rd_p];
 		if (unlikely(rxb == NULL)) {
 			RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n");
 			break;
@@ -1725,7 +1715,7 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		 */
 		rte_rmb();
 
-		rxds = &rxq->rxds[idx];
+		rxds = &rxq->rxds[rxq->rd_p];
 		if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
 			break;
 
@@ -1813,6 +1803,8 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
 
 		rxq->rd_p++;
+		if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/
+			rxq->rd_p = 0;
 	}
 
 	if (nb_hold == 0)
@@ -1858,33 +1850,40 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	/* Work out how many packets have been sent */
 	qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
 
-	if (qcp_rd_p == txq->qcp_rd_p) {
+	if (qcp_rd_p == txq->rd_p) {
 		PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending "
 			   "packets (%u, %u)\n", txq->qidx,
-			   qcp_rd_p, txq->qcp_rd_p);
+			   qcp_rd_p, txq->rd_p);
 		return 0;
 	}
 
-	if (qcp_rd_p > txq->qcp_rd_p)
-		todo = qcp_rd_p - txq->qcp_rd_p;
+	if (qcp_rd_p > txq->rd_p)
+		todo = qcp_rd_p - txq->rd_p;
 	else
-		todo = qcp_rd_p + txq->tx_count - txq->qcp_rd_p;
+		todo = qcp_rd_p + txq->tx_count - txq->rd_p;
 
-	PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->qcp_rd_p: %u, qcp->rd_p: %u\n",
-		   qcp_rd_p, txq->qcp_rd_p, txq->rd_p);
+	PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u\n",
+		   qcp_rd_p, txq->rd_p, txq->rd_p);
 
 	if (todo == 0)
 		return todo;
 
-	txq->qcp_rd_p += todo;
-	txq->qcp_rd_p %= txq->tx_count;
 	txq->rd_p += todo;
+	if (unlikely(txq->rd_p >= txq->tx_count))
+		txq->rd_p -= txq->tx_count;
 
 	return todo;
 }
 
 /* Leaving always free descriptors for avoiding wrapping confusion */
-#define NFP_FREE_TX_DESC(t) (t->tx_count - (t->wr_p - t->rd_p) - 8)
+static inline
+uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq)
+{
+	if (txq->wr_p >= txq->rd_p)
+		return txq->tx_count - (txq->wr_p - txq->rd_p) - 8;
+	else
+		return txq->rd_p - txq->wr_p - 8;
+}
 
 /*
  * nfp_net_txq_full - Check if the TX queue free descriptors
@@ -1895,9 +1894,9 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
  * This function uses the host copy* of read/write pointers
  */
 static inline
-int nfp_net_txq_full(struct nfp_net_txq *txq)
+uint32_t nfp_net_txq_full(struct nfp_net_txq *txq)
 {
-	return NFP_FREE_TX_DESC(txq) < txq->tx_free_thresh;
+	return (nfp_free_tx_desc(txq) < txq->tx_free_thresh);
 }
 
 static uint16_t
@@ -1915,15 +1914,15 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 
 	txq = tx_queue;
 	hw = txq->hw;
-	txds = &txq->txds[txq->tail];
+	txds = &txq->txds[txq->wr_p];
 
 	PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets\n",
-		   txq->qidx, txq->tail, nb_pkts);
+		   txq->qidx, txq->wr_p, nb_pkts);
 
-	if ((NFP_FREE_TX_DESC(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
+	if ((nfp_free_tx_desc(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
 		nfp_net_tx_free_bufs(txq);
 
-	free_descs = (uint16_t)NFP_FREE_TX_DESC(txq);
+	free_descs = (uint16_t)nfp_free_tx_desc(txq);
 	if (unlikely(free_descs == 0))
 		return 0;
 
@@ -1936,7 +1935,7 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 	/* Sending packets */
 	while ((i < nb_pkts) && free_descs) {
 		/* Grabbing the mbuf linked to the current descriptor */
-		lmbuf = &txq->txbufs[txq->tail].mbuf;
+		lmbuf = &txq->txbufs[txq->wr_p].mbuf;
 		/* Warming the cache for releasing the mbuf later on */
 		RTE_MBUF_PREFETCH_TO_FREE(*lmbuf);
 
@@ -1998,9 +1997,8 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 			free_descs--;
 
 			txq->wr_p++;
-			txq->tail++;
-			if (unlikely(txq->tail == txq->tx_count)) /* wrapping?*/
-				txq->tail = 0;
+			if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/
+				txq->wr_p = 0;
 
 			pkt_size -= dma_size;
 			if (!pkt_size) {
@@ -2011,7 +2009,7 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 				pkt = pkt->next;
 			}
 			/* Referencing next free TX descriptor */
-			txds = &txq->txds[txq->tail];
+			txds = &txq->txds[txq->wr_p];
 			issued_descs++;
 		}
 		i++;
diff --git a/drivers/net/nfp/nfp_net_pmd.h b/drivers/net/nfp/nfp_net_pmd.h
index dc70d57..4df2275 100644
--- a/drivers/net/nfp/nfp_net_pmd.h
+++ b/drivers/net/nfp/nfp_net_pmd.h
@@ -216,12 +216,10 @@ struct nfp_net_txq {
 
 	uint32_t wr_p;
 	uint32_t rd_p;
-	uint32_t qcp_rd_p;
 
 	uint32_t tx_count;
 
 	uint32_t tx_free_thresh;
-	uint32_t tail;
 
 	/*
 	 * For each descriptor keep a reference to the mbuff and
@@ -240,7 +238,7 @@ struct nfp_net_txq {
 	struct nfp_net_tx_desc *txds;
 
 	/*
-	 * At this point 56 bytes have been used for all the fields in the
+	 * At this point 48 bytes have been used for all the fields in the
 	 * TX critical path. We have room for 8 bytes and still all placed
 	 * in a cache line. We are not using the threshold values below nor
 	 * the txq_flags but if we need to, we can add the most used in the
@@ -326,7 +324,6 @@ struct nfp_net_rxq {
 	 * freelist descriptors and @rd_p is where the driver start
 	 * reading descriptors for newly arrive packets from.
 	 */
-	uint32_t wr_p;
 	uint32_t rd_p;
 
 	/*
-- 
1.9.1



More information about the dev mailing list