[dpdk-dev] [PATCH v5 4/5] bnx2x: use single doorbell for TX

Rasesh Mody rasesh.mody at qlogic.com
Thu May 12 02:06:24 CEST 2016


Change the Tx routine to ring the doorbell once per burst
and not on every Tx packet. This driver-level optimization
is necessary to achieve line rates for larger frame
sizes (1k or more).

Signed-off-by: Rasesh Mody <rasesh.mody at qlogic.com>
Signed-off-by: Harish Patil <harish.patil at qlogic.com>
---
 drivers/net/bnx2x/bnx2x.c      |  200 +++++++++++++++++++---------------------
 drivers/net/bnx2x/bnx2x.h      |    3 +-
 drivers/net/bnx2x/bnx2x_rxtx.c |   23 +++--
 3 files changed, 107 insertions(+), 119 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index 5d182c3..f7ee77e 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -1293,7 +1293,7 @@ bnx2x_free_tx_pkt(__rte_unused struct bnx2x_fastpath *fp, struct bnx2x_tx_queue
 	struct rte_mbuf *tx_mbuf = txq->sw_ring[TX_BD(pkt_idx, txq)];
 
 	if (likely(tx_mbuf != NULL)) {
-		rte_pktmbuf_free(tx_mbuf);
+		rte_pktmbuf_free_seg(tx_mbuf);
 	} else {
 		PMD_RX_LOG(ERR, "fp[%02d] lost mbuf %lu",
 			   fp->index, (unsigned long)TX_BD(pkt_idx, txq));
@@ -2113,141 +2113,127 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link
  * the mbuf and return to the caller.
  *
  * Returns:
- *   void.
+ *     int: Number of TX BDs used for the mbuf
  *
  *   Note the side effect that an mbuf may be freed if it causes a problem.
  */
-void bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head,
-		    int m_pkts)
+int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0)
 {
-	struct rte_mbuf *m0;
 	struct eth_tx_start_bd *tx_start_bd;
 	uint16_t bd_prod, pkt_prod;
-	int m_tx;
 	struct bnx2x_softc *sc;
 	uint32_t nbds = 0;
-	struct bnx2x_fastpath *fp;
 
 	sc = txq->sc;
-	fp = &sc->fp[txq->queue_id];
-
 	bd_prod = txq->tx_bd_tail;
 	pkt_prod = txq->tx_pkt_tail;
 
-	for (m_tx = 0; m_tx < m_pkts; m_tx++) {
-
-		m0 = *m_head++;
+	txq->sw_ring[TX_BD(pkt_prod, txq)] = m0;
 
-		txq->sw_ring[TX_BD(pkt_prod, txq)] = m0;
+	tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd;
 
-		tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd;
+	tx_start_bd->addr =
+	    rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0));
+	tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len);
+	tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
+	tx_start_bd->general_data =
+	    (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT);
 
-		tx_start_bd->addr =
-		    rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0));
-		tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len);
-		tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
-		tx_start_bd->general_data =
-		    (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT);
+	tx_start_bd->nbd = rte_cpu_to_le_16(2);
 
-		tx_start_bd->nbd = rte_cpu_to_le_16(2);
+	if (m0->ol_flags & PKT_TX_VLAN_PKT) {
+		tx_start_bd->vlan_or_ethertype =
+		    rte_cpu_to_le_16(m0->vlan_tci);
+		tx_start_bd->bd_flags.as_bitfield |=
+		    (X_ETH_OUTBAND_VLAN <<
+		     ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
+	} else {
+		if (IS_PF(sc))
+			tx_start_bd->vlan_or_ethertype =
+			    rte_cpu_to_le_16(pkt_prod);
+		else {
+			struct ether_hdr *eh =
+			    rte_pktmbuf_mtod(m0, struct ether_hdr *);
 
-		if (m0->ol_flags & PKT_TX_VLAN_PKT) {
 			tx_start_bd->vlan_or_ethertype =
-			    rte_cpu_to_le_16(m0->vlan_tci);
-			tx_start_bd->bd_flags.as_bitfield |=
-			    (X_ETH_OUTBAND_VLAN <<
-			     ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
-		} else {
-			if (IS_PF(sc))
-				tx_start_bd->vlan_or_ethertype =
-				    rte_cpu_to_le_16(pkt_prod);
-			else {
-				struct ether_hdr *eh
-				    = rte_pktmbuf_mtod(m0, struct ether_hdr *);
-
-				tx_start_bd->vlan_or_ethertype
-				    = rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type));
-			}
+			    rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type));
 		}
+	}
 
-		bd_prod = NEXT_TX_BD(bd_prod);
-		if (IS_VF(sc)) {
-			struct eth_tx_parse_bd_e2 *tx_parse_bd;
-			const struct ether_hdr *eh = rte_pktmbuf_mtod(m0, struct ether_hdr *);
-			uint8_t mac_type = UNICAST_ADDRESS;
-
-			tx_parse_bd =
-			    &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2;
-			if (is_multicast_ether_addr(&eh->d_addr)) {
-				if (is_broadcast_ether_addr(&eh->d_addr))
-					mac_type = BROADCAST_ADDRESS;
-				else
-					mac_type = MULTICAST_ADDRESS;
-			}
-			tx_parse_bd->parsing_data =
-			    (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT);
-
-			rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi,
-				   &eh->d_addr.addr_bytes[0], 2);
-			rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid,
-				   &eh->d_addr.addr_bytes[2], 2);
-			rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo,
-				   &eh->d_addr.addr_bytes[4], 2);
-			rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi,
-				   &eh->s_addr.addr_bytes[0], 2);
-			rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid,
-				   &eh->s_addr.addr_bytes[2], 2);
-			rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo,
-				   &eh->s_addr.addr_bytes[4], 2);
-
-			tx_parse_bd->data.mac_addr.dst_hi =
-			    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi);
-			tx_parse_bd->data.mac_addr.dst_mid =
-			    rte_cpu_to_be_16(tx_parse_bd->data.
-					     mac_addr.dst_mid);
-			tx_parse_bd->data.mac_addr.dst_lo =
-			    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo);
-			tx_parse_bd->data.mac_addr.src_hi =
-			    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi);
-			tx_parse_bd->data.mac_addr.src_mid =
-			    rte_cpu_to_be_16(tx_parse_bd->data.
-					     mac_addr.src_mid);
-			tx_parse_bd->data.mac_addr.src_lo =
-			    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo);
-
-			PMD_TX_LOG(DEBUG,
-				   "PBD dst %x %x %x src %x %x %x p_data %x",
-				   tx_parse_bd->data.mac_addr.dst_hi,
-				   tx_parse_bd->data.mac_addr.dst_mid,
-				   tx_parse_bd->data.mac_addr.dst_lo,
-				   tx_parse_bd->data.mac_addr.src_hi,
-				   tx_parse_bd->data.mac_addr.src_mid,
-				   tx_parse_bd->data.mac_addr.src_lo,
-				   tx_parse_bd->parsing_data);
-		}
+	bd_prod = NEXT_TX_BD(bd_prod);
+	if (IS_VF(sc)) {
+		struct eth_tx_parse_bd_e2 *tx_parse_bd;
+		const struct ether_hdr *eh =
+		    rte_pktmbuf_mtod(m0, struct ether_hdr *);
+		uint8_t mac_type = UNICAST_ADDRESS;
+
+		tx_parse_bd =
+		    &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2;
+		if (is_multicast_ether_addr(&eh->d_addr)) {
+			if (is_broadcast_ether_addr(&eh->d_addr))
+				mac_type = BROADCAST_ADDRESS;
+			else
+				mac_type = MULTICAST_ADDRESS;
+		}
+		tx_parse_bd->parsing_data =
+		    (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT);
+
+		rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi,
+			   &eh->d_addr.addr_bytes[0], 2);
+		rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid,
+			   &eh->d_addr.addr_bytes[2], 2);
+		rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo,
+			   &eh->d_addr.addr_bytes[4], 2);
+		rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi,
+			   &eh->s_addr.addr_bytes[0], 2);
+		rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid,
+			   &eh->s_addr.addr_bytes[2], 2);
+		rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo,
+			   &eh->s_addr.addr_bytes[4], 2);
+
+		tx_parse_bd->data.mac_addr.dst_hi =
+		    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi);
+		tx_parse_bd->data.mac_addr.dst_mid =
+		    rte_cpu_to_be_16(tx_parse_bd->data.
+				     mac_addr.dst_mid);
+		tx_parse_bd->data.mac_addr.dst_lo =
+		    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo);
+		tx_parse_bd->data.mac_addr.src_hi =
+		    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi);
+		tx_parse_bd->data.mac_addr.src_mid =
+		    rte_cpu_to_be_16(tx_parse_bd->data.
+				     mac_addr.src_mid);
+		tx_parse_bd->data.mac_addr.src_lo =
+		    rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo);
 
 		PMD_TX_LOG(DEBUG,
-			   "start bd: nbytes %d flags %x vlan %x\n",
-			   tx_start_bd->nbytes,
-			   tx_start_bd->bd_flags.as_bitfield,
-			   tx_start_bd->vlan_or_ethertype);
+			   "PBD dst %x %x %x src %x %x %x p_data %x",
+			   tx_parse_bd->data.mac_addr.dst_hi,
+			   tx_parse_bd->data.mac_addr.dst_mid,
+			   tx_parse_bd->data.mac_addr.dst_lo,
+			   tx_parse_bd->data.mac_addr.src_hi,
+			   tx_parse_bd->data.mac_addr.src_mid,
+			   tx_parse_bd->data.mac_addr.src_lo,
+			   tx_parse_bd->parsing_data);
+	}
 
-		bd_prod = NEXT_TX_BD(bd_prod);
-		pkt_prod++;
+	PMD_TX_LOG(DEBUG,
+		   "start bd: nbytes %d flags %x vlan %x\n",
+		   tx_start_bd->nbytes,
+		   tx_start_bd->bd_flags.as_bitfield,
+		   tx_start_bd->vlan_or_ethertype);
 
-		if (TX_IDX(bd_prod) < 2) {
-			nbds++;
-		}
-	}
+	bd_prod = NEXT_TX_BD(bd_prod);
+	pkt_prod++;
+
+	if (TX_IDX(bd_prod) < 2)
+		nbds++;
 
-	txq->nb_tx_avail -= m_pkts << 1;
+	txq->nb_tx_avail -= 2;
 	txq->tx_bd_tail = bd_prod;
 	txq->tx_pkt_tail = pkt_prod;
 
-	mb();
-	fp->tx_db.data.prod += (m_pkts << 1) + nbds;
-	DOORBELL(sc, txq->queue_id, fp->tx_db.raw);
-	mb();
+	return nbds + 2;
 }
 
 static uint16_t bnx2x_cid_ilt_lines(struct bnx2x_softc *sc)
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 42700e7..c24a530 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1866,8 +1866,7 @@ int bnx2x_alloc_hsi_mem(struct bnx2x_softc *sc);
 int bnx2x_alloc_ilt_mem(struct bnx2x_softc *sc);
 void bnx2x_free_ilt_mem(struct bnx2x_softc *sc);
 void bnx2x_dump_tx_chain(struct bnx2x_fastpath * fp, int bd_prod, int count);
-void bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head,
-	int m_pkts);
+int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0);
 uint8_t bnx2x_txeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp);
 void bnx2x_print_adapter_info(struct bnx2x_softc *sc);
 int bnx2x_intr_legacy(struct bnx2x_softc *sc, int scan_fp);
diff --git a/drivers/net/bnx2x/bnx2x_rxtx.c b/drivers/net/bnx2x/bnx2x_rxtx.c
index 3e1f83b..8bcb431 100644
--- a/drivers/net/bnx2x/bnx2x_rxtx.c
+++ b/drivers/net/bnx2x/bnx2x_rxtx.c
@@ -222,10 +222,9 @@ bnx2x_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	struct bnx2x_tx_queue *txq;
 	struct bnx2x_softc *sc;
 	struct bnx2x_fastpath *fp;
-	uint32_t burst;
-	struct rte_mbuf **m = tx_pkts;
 	uint16_t nb_tx_pkts;
 	uint16_t nb_pkt_sent = 0;
+	uint32_t ret;
 
 	txq = p_txq;
 	sc = txq->sc;
@@ -239,19 +238,23 @@ bnx2x_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	if (unlikely(nb_tx_pkts == 0))
 		return 0;
 
-	burst = RTE_MIN(nb_tx_pkts, RTE_PMD_BNX2X_TX_MAX_BURST);
-
 	while (nb_tx_pkts--) {
+		struct rte_mbuf *m = *tx_pkts++;
 		assert(m != NULL);
-		bnx2x_tx_encap(txq, m, burst);
-		bnx2x_update_fp_sb_idx(fp);
-		if ((txq->nb_tx_desc - txq->nb_tx_avail) >
-					txq->tx_free_thresh)
-			bnx2x_txeof(sc, fp);
-		m += burst;
+		ret = bnx2x_tx_encap(txq, m);
+		fp->tx_db.data.prod += ret;
 		nb_pkt_sent++;
 	}
 
+	bnx2x_update_fp_sb_idx(fp);
+	mb();
+	DOORBELL(sc, txq->queue_id, fp->tx_db.raw);
+	mb();
+
+	if ((txq->nb_tx_desc - txq->nb_tx_avail) >
+				txq->tx_free_thresh)
+		bnx2x_txeof(sc, fp);
+
 	return nb_pkt_sent;
 }
 
-- 
1.7.10.3



More information about the dev mailing list