[dpdk-dev] [PATCH v2 5/6] vmxnet3: Perf improvement on the rx path

Yong Wang yongwang at vmware.com
Wed Nov 5 02:49:42 CET 2014


This patch includes two small performance optimizations
on the rx path:

(1) It adds unlikely hints on various infrequent error
paths to the compiler to make branch prediction more
efficient.

(2) It also moves a constant assignment out of the pkt
polling loop.  This saves one branching per packet.

Performance evaluation configs:
- On the DPDK-side, it's running some l3 forwarding app
inside a VM on ESXi with one core assigned for polling.
- On the client side, pktgen/dpdk is used to generate
64B tcp packets at line rate (14.8M PPS).

Performance results on a Nehalem box (4cores at 2.8GHzx2)
shown below.  CPU usage is collected factoring out the
idle loop cost.
- Before the patch, ~900K PPS with 65% CPU of a core
used for DPDK.
- After the patch, only 45% of a core used, while
maintaining the same packet rate.

Signed-off-by: Yong Wang <yongwang at vmware.com>
---
 lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c | 242 ++++++++++++++++------------------
 1 file changed, 116 insertions(+), 126 deletions(-)

diff --git a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
index e2fb8a8..4799f4d 100644
--- a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
+++ b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
@@ -451,6 +451,19 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
 	uint32_t i = 0, val = 0;
 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
 
+	if (ring_id == 0) {
+		/* Usually: One HEAD type buf per packet
+		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
+		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
+		 */
+
+		/* We use single packet buffer so all heads here */
+		val = VMXNET3_RXD_BTYPE_HEAD;
+	} else {
+		/* All BODY type buffers for 2nd ring */
+		val = VMXNET3_RXD_BTYPE_BODY;
+	}
+
 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
 		struct Vmxnet3_RxDesc *rxd;
 		struct rte_mbuf *mbuf;
@@ -458,22 +471,9 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
 
 		rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
 
-		if (ring->rid == 0) {
-			/* Usually: One HEAD type buf per packet
-			 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
-			 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
-			 */
-
-			/* We use single packet buffer so all heads here */
-			val = VMXNET3_RXD_BTYPE_HEAD;
-		} else {
-			/* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
-			val = VMXNET3_RXD_BTYPE_BODY;
-		}
-
 		/* Allocate blank mbuf for the current Rx Descriptor */
 		mbuf = rte_rxmbuf_alloc(rxq->mp);
-		if (mbuf == NULL) {
+		if (unlikely(mbuf == NULL)) {
 			PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
 			rxq->stats.rx_buf_alloc_failure++;
 			err = ENOMEM;
@@ -536,151 +536,141 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
 
-	if (rxq->stopped) {
+	if (unlikely(rxq->stopped)) {
 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
 		return 0;
 	}
 
 	while (rcd->gen == rxq->comp_ring.gen) {
-
 		if (nb_rx >= nb_pkts)
 			break;
+
 		idx = rcd->rxdIdx;
 		ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
 
-		if (rcd->sop != 1 || rcd->eop != 1) {
+		if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
 			rte_pktmbuf_free_seg(rbi->m);
-
 			PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
 			goto rcd_done;
+		}
 
-		} else {
-
-			PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
+		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
 
 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-			VMXNET3_ASSERT(rcd->len <= rxd->len);
-			VMXNET3_ASSERT(rbi->m);
+		VMXNET3_ASSERT(rcd->len <= rxd->len);
+		VMXNET3_ASSERT(rbi->m);
 #endif
-			if (rcd->len == 0) {
-				PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
-					   ring_idx, idx);
+		if (unlikely(rcd->len == 0)) {
+			PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
+				   ring_idx, idx);
 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-				VMXNET3_ASSERT(rcd->sop && rcd->eop);
+			VMXNET3_ASSERT(rcd->sop && rcd->eop);
 #endif
-				rte_pktmbuf_free_seg(rbi->m);
-
-				goto rcd_done;
-			}
+			rte_pktmbuf_free_seg(rbi->m);
+			goto rcd_done;
+		}
 
-			/* Assuming a packet is coming in a single packet buffer */
-			if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
-				PMD_RX_LOG(DEBUG,
-					   "Alert : Misbehaving device, incorrect "
-					   " buffer type used. iPacket dropped.");
-				rte_pktmbuf_free_seg(rbi->m);
-				goto rcd_done;
-			}
+		/* Assuming a packet is coming in a single packet buffer */
+		if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
+			PMD_RX_LOG(DEBUG,
+				   "Alert : Misbehaving device, incorrect "
+				   " buffer type used. iPacket dropped.");
+			rte_pktmbuf_free_seg(rbi->m);
+			goto rcd_done;
+		}
 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-			VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
+		VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
 #endif
-			/* Get the packet buffer pointer from buf_info */
-			rxm = rbi->m;
-
-			/* Clear descriptor associated buf_info to be reused */
-			rbi->m = NULL;
-			rbi->bufPA = 0;
-
-			/* Update the index that we received a packet */
-			rxq->cmd_ring[ring_idx].next2comp = idx;
-
-			/* For RCD with EOP set, check if there is frame error */
-			if (rcd->err) {
-				rxq->stats.drop_total++;
-				rxq->stats.drop_err++;
-
-				if (!rcd->fcs) {
-					rxq->stats.drop_fcs++;
-					PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
-				}
-				PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
-					   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
-						 rxq->comp_ring.base), rcd->rxdIdx);
-				rte_pktmbuf_free_seg(rxm);
-
-				goto rcd_done;
-			}
+		/* Get the packet buffer pointer from buf_info */
+		rxm = rbi->m;
 
-			/* Check for hardware stripped VLAN tag */
-			if (rcd->ts) {
-				PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
-					   rcd->tci);
-				rxm->ol_flags = PKT_RX_VLAN_PKT;
-#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-				VMXNET3_ASSERT(rxm &&
-					       rte_pktmbuf_mtod(rxm, void *));
-#endif
-				/* Copy vlan tag in packet buffer */
-				rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
-			} else {
-				rxm->ol_flags = 0;
-				rxm->vlan_tci = 0;
-			}
+		/* Clear descriptor associated buf_info to be reused */
+		rbi->m = NULL;
+		rbi->bufPA = 0;
 
-			/* Initialize newly received packet buffer */
-			rxm->port = rxq->port_id;
-			rxm->nb_segs = 1;
-			rxm->next = NULL;
-			rxm->pkt_len = (uint16_t)rcd->len;
-			rxm->data_len = (uint16_t)rcd->len;
-			rxm->port = rxq->port_id;
-			rxm->data_off = RTE_PKTMBUF_HEADROOM;
-
-			/* Check packet types, rx checksum errors, etc. Only support IPv4 so far. */
-			if (rcd->v4) {
-				struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
-				struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
-
-				if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
-					rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
-				else
-					rxm->ol_flags |= PKT_RX_IPV4_HDR;
-
-				if (!rcd->cnc) {
-					if (!rcd->ipc)
-						rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-
-					if ((rcd->tcp || rcd->udp) && !rcd->tuc)
-						rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-				}
-			}
+		/* Update the index that we received a packet */
+		rxq->cmd_ring[ring_idx].next2comp = idx;
 
-			rx_pkts[nb_rx++] = rxm;
-rcd_done:
-			rxq->cmd_ring[ring_idx].next2comp = idx;
-			VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
+		/* For RCD with EOP set, check if there is frame error */
+		if (unlikely(rcd->err)) {
+			rxq->stats.drop_total++;
+			rxq->stats.drop_err++;
 
-			/* It's time to allocate some new buf and renew descriptors */
-			vmxnet3_post_rx_bufs(rxq, ring_idx);
-			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
-				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
-						       rxq->cmd_ring[ring_idx].next2fill);
+			if (!rcd->fcs) {
+				rxq->stats.drop_fcs++;
+				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
 			}
+			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
+				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
+					 rxq->comp_ring.base), rcd->rxdIdx);
+			rte_pktmbuf_free_seg(rxm);
+			goto rcd_done;
+		}
 
-			/* Advance to the next descriptor in comp_ring */
-			vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
+		/* Check for hardware stripped VLAN tag */
+		if (rcd->ts) {
+			PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
+				   rcd->tci);
+			rxm->ol_flags = PKT_RX_VLAN_PKT;
+			/* Copy vlan tag in packet buffer */
+			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
+		} else {
+			rxm->ol_flags = 0;
+			rxm->vlan_tci = 0;
+		}
 
-			rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
-			nb_rxd++;
-			if (nb_rxd > rxq->cmd_ring[0].size) {
-				PMD_RX_LOG(ERR,
-					   "Used up quota of receiving packets,"
-					   " relinquish control.");
-				break;
+		/* Initialize newly received packet buffer */
+		rxm->port = rxq->port_id;
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = (uint16_t)rcd->len;
+		rxm->data_len = (uint16_t)rcd->len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* Check packet type, checksum errors, etc. Only support IPv4 for now. */
+		if (rcd->v4) {
+			struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
+			struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
+
+			if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
+				rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
+			else
+				rxm->ol_flags |= PKT_RX_IPV4_HDR;
+
+			if (!rcd->cnc) {
+				if (!rcd->ipc)
+					rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+
+				if ((rcd->tcp || rcd->udp) && !rcd->tuc)
+					rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
 			}
 		}
+
+		rx_pkts[nb_rx++] = rxm;
+rcd_done:
+		rxq->cmd_ring[ring_idx].next2comp = idx;
+		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
+
+		/* It's time to allocate some new buf and renew descriptors */
+		vmxnet3_post_rx_bufs(rxq, ring_idx);
+		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
+			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
+					       rxq->cmd_ring[ring_idx].next2fill);
+		}
+
+		/* Advance to the next descriptor in comp_ring */
+		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
+
+		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
+		nb_rxd++;
+		if (nb_rxd > rxq->cmd_ring[0].size) {
+			PMD_RX_LOG(ERR,
+				   "Used up quota of receiving packets,"
+				   " relinquish control.");
+			break;
+		}
 	}
 
 	return nb_rx;
-- 
1.9.1



More information about the dev mailing list