[dpdk-dev] [PATCH] The VMXNET3 PMD can't receive packet suddenly after a lot of traffic coming in. The root cause is due to mbuf allocation fail in vmxnet3_post_rx_bufs() and there is no error handling when it is called from vmxnet3_recv_pkts(). Th...

Vithal S Mohare vmohare at arubanetworks.com
Thu Jul 23 09:31:01 CEST 2015


How about the below changes? I have been using below changes and helping to resolve the issue.

===============================================================================

===== dpdk/lib/librte_pmd_vmxnet3/vmxnet3_ring.h#3 edit (text) =====  

@@ -155,10 +155,11 @@ typedef struct vmxnet3_tx_queue {  struct vmxnet3_rxq_stats {
 	uint64_t                     drop_total;
 	uint64_t                     drop_err;
 	uint64_t                     drop_fcs;
 	uint64_t                     rx_buf_alloc_failure;
+    uint64_t                     rx_buf_replenish;
 };
 
 typedef struct vmxnet3_rx_queue {
 	struct rte_mempool          *mp;
 	struct vmxnet3_hw           *hw;

===== dpdk/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c#5 edit (text) =====  

@@ -645,10 +645,32 @@ rcd_done:
 				break;
 			}
 		}
 	}
 
+    /* VMXNET3
+     * In the above loop, vmxnet3_post_rx_bufs would fai if all the mbufs currently allocated.
+     * In such scenarios where hw device hasn't left with any of 'rx' descriptors, packets from
+     * network will not be 'DMA'd to driver.  While the only way to refresh 'rxd' back to hw is
+     * though above i.e. when packet is received from hw.  So, there is potential dead-lock.
+     *
+     * Now, to break the deadlock, vmxnet3_post_rx_bufs() is triggered below when the poll 
+     * goes empty 'rcd'.  vmxnet3_post_rx_bufs() is no-op if all the descriptors are allocated
+     * in hw
+     */
+
+    if (rcd->gen != rxq->comp_ring.gen) {
+    	ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
+        if (vmxnet3_post_rx_bufs(rxq, ring_idx) > 0 ) {
+    	    if (unlikely(rxq->shared->ctrl.updateRxProd)) {
+	    	    VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
+							rxq->cmd_ring[ring_idx].next2fill);
+            }
+            rxq->stats.rx_buf_replenish++;     
+        }
+    }
+
 	return (nb_rx);
 }

===============================================================================

-----Original Message-----
From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Stephen Hemminger
Sent: 23 July 2015 AM 10:58
To: mac_leehk at yahoo.com.hk
Cc: dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH] The VMXNET3 PMD can't receive packet suddenly after a lot of traffic coming in. The root cause is due to mbuf allocation fail in vmxnet3_post_rx_bufs() and there is no error handling when it is called from vmxnet3_recv_pkts(). Th...

On Thu, 23 Jul 2015 09:48:55 +0800
mac_leehk at yahoo.com.hk wrote:

> From: marco <marco at ubuntu.(none)>

Thank you for addressing a real bug. 

But there are several issues with the patch as submitted:

 * the standard way to handle allocation failure in network drivers is to drop the
   received packet and reuse the available data buffer (mbuf) for the next packet.
   It looks like your code would just stop receiving which could cause deadlock.

 * the mail is formatted in a manner than is incompatible with merging into git.
   All submissions should have a short < 60 character Subject with a summary
   followed by a description.  I don't know what mail client you used but everything
   is smashed into the Subject.

 * all patches require a Signed-off-by with a real name for Developer's Certificate Of Origin

 * the style is wrong, indentation is a mess please indent with tabs not spaces.

 * avoid extra comments, often in code too many comments are worse than too few


Please rework your patch and resubmit it.

>  drivers/net/vmxnet3/vmxnet3_rxtx.c |   54 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 53 insertions(+), 1 deletion(-)  mode change 100644 
> => 100755 drivers/net/vmxnet3/vmxnet3_rxtx.c
> 
> diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c 
> b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> old mode 100644
> new mode 100755
> index 39ad6ef..d560bbb
> --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
> +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> @@ -421,6 +421,51 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	return nb_tx;
>  }
>  
> +static inline void
> +vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,struct 
> +rte_mbuf *mbuf) {
> +	uint32_t  val = 0;
> +	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
> +
> +	struct Vmxnet3_RxDesc *rxd;
> +	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
> +
> +	rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
> +
> +	if (ring->rid == 0) {
> +	/* Usually: One HEAD type buf per packet
> +	 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
> +	 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
> +	 */
> +
> +	/* We use single packet buffer so all heads here */
> +		val = VMXNET3_RXD_BTYPE_HEAD;
> +	} else {
> +	/* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
> +		val = VMXNET3_RXD_BTYPE_BODY;
> +	}
> +
> +	/*
> +	 * Load mbuf pointer into buf_info[ring_size]
> +	 * buf_info structure is equivalent to cookie for virtio-virtqueue
> +	 */
> +	buf_info->m = mbuf;
> +	buf_info->len = (uint16_t)(mbuf->buf_len -
> +					   RTE_PKTMBUF_HEADROOM);
> +	buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
> +
> +	/* Load Rx Descriptor with the buffer's GPA */
> +	rxd->addr = buf_info->bufPA;
> +
> +	/* After this point rxd->addr MUST not be NULL */
> +	rxd->btype = val;
> +	rxd->len = buf_info->len;
> +	/* Flip gen bit at the end to change ownership */
> +	rxd->gen = ring->gen;
> +
> +	vmxnet3_cmd_ring_adv_next2fill(ring);
> +
> +}
>  /*
>   *  Allocates mbufs and clusters. Post rx descriptors with buffer details
>   *  so that device can receive packets in those buffers.
> @@ -575,8 +620,15 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
>  	}
>  
>  	while (rcd->gen == rxq->comp_ring.gen) {
> +	        struct rte_mbuf *rep;
>  		if (nb_rx >= nb_pkts)
>  			break;
> +		
> +		rep = rte_rxmbuf_alloc(rxq->mp);
> +        	if (rep == NULL) {
> +            		rxq->stats.rx_buf_alloc_failure++;
> +            		break;
> +        	}
>  
>  		idx = rcd->rxdIdx;
>  		ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1); @@ -657,7 
> +709,7 @@ rcd_done:
>  		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, 
> rxq->cmd_ring[ring_idx].size);
>  
>  		/* It's time to allocate some new buf and renew descriptors */
> -		vmxnet3_post_rx_bufs(rxq, ring_idx);
> +		vmxnet3_renew_desc(rxq, ring_idx,rep);
>  		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
>  			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
>  					       rxq->cmd_ring[ring_idx].next2fill);



More information about the dev mailing list